1use std::ffi;
36use std::os::raw::c_char;
37use std::slice;
38use std::str;
39use std::sync::Arc;
40
41use crate::htslib;
42
43use linear_map::LinearMap;
44
45use crate::errors::{Error, Result};
46
47pub type SampleSubset = Vec<i32>;
48
49custom_derive! {
50 #[derive(
52 NewtypeFrom,
53 NewtypeDeref,
54 PartialEq,
55 PartialOrd,
56 Eq,
57 Ord,
58 Copy,
59 Clone,
60 Debug
61 )]
62 pub struct Id(pub u32);
63}
64
65#[derive(Debug)]
67pub struct Header {
68 pub(crate) inner: *mut htslib::bcf_hdr_t,
69 pub subset: Option<SampleSubset>,
70}
71
72unsafe impl Send for Header {}
73unsafe impl Sync for Header {}
74
75impl Default for Header {
76 fn default() -> Self {
77 Self::new()
78 }
79}
80
81impl Header {
82 pub fn new() -> Self {
84 let c_str = ffi::CString::new(&b"w"[..]).unwrap();
85 Header {
86 inner: unsafe { htslib::bcf_hdr_init(c_str.as_ptr()) },
87 subset: None,
88 }
89 }
90
91 pub unsafe fn inner_ptr(&self) -> *mut htslib::bcf_hdr_t {
97 self.inner
98 }
99
100 pub fn from_template(header: &HeaderView) -> Self {
108 Header {
109 inner: unsafe { htslib::bcf_hdr_dup(header.inner) },
110 subset: None,
111 }
112 }
113
114 pub fn from_template_subset(header: &HeaderView, samples: &[&[u8]]) -> Result<Self> {
122 let mut imap = vec![0; samples.len()];
123 let names: Vec<_> = samples
124 .iter()
125 .map(|&s| ffi::CString::new(s).unwrap())
126 .collect();
127 let name_pointers: Vec<_> = names.iter().map(|s| s.as_ptr() as *mut i8).collect();
128 #[allow(clippy::unnecessary_cast)]
129 let name_pointers_ptr = name_pointers.as_ptr() as *const *mut c_char;
130 let inner = unsafe {
131 htslib::bcf_hdr_subset(
132 header.inner,
133 samples.len() as i32,
134 name_pointers_ptr,
135 imap.as_mut_ptr(),
136 )
137 };
138 if inner.is_null() {
139 Err(Error::BcfDuplicateSampleNames)
140 } else {
141 Ok(Header {
142 inner,
143 subset: Some(imap),
144 })
145 }
146 }
147
148 pub fn push_sample(&mut self, sample: &[u8]) -> &mut Self {
154 let c_str = ffi::CString::new(sample).unwrap();
155 unsafe { htslib::bcf_hdr_add_sample(self.inner, c_str.as_ptr()) };
156 self
157 }
158
159 pub fn push_record(&mut self, record: &[u8]) -> &mut Self {
171 let c_str = ffi::CString::new(record).unwrap();
172 unsafe { htslib::bcf_hdr_append(self.inner, c_str.as_ptr()) };
173 self
174 }
175
176 pub fn remove_filter(&mut self, tag: &[u8]) -> &mut Self {
182 self.remove_impl(tag, htslib::BCF_HL_FLT)
183 }
184
185 pub fn remove_info(&mut self, tag: &[u8]) -> &mut Self {
191 self.remove_impl(tag, htslib::BCF_HL_INFO)
192 }
193
194 pub fn remove_format(&mut self, tag: &[u8]) -> &mut Self {
200 self.remove_impl(tag, htslib::BCF_HL_FMT)
201 }
202
203 pub fn remove_contig(&mut self, tag: &[u8]) -> &mut Self {
209 self.remove_impl(tag, htslib::BCF_HL_CTG)
210 }
211
212 pub fn remove_structured(&mut self, tag: &[u8]) -> &mut Self {
218 self.remove_impl(tag, htslib::BCF_HL_STR)
219 }
220
221 pub fn remove_generic(&mut self, tag: &[u8]) -> &mut Self {
227 self.remove_impl(tag, htslib::BCF_HL_GEN)
228 }
229
230 fn remove_impl(&mut self, tag: &[u8], type_: u32) -> &mut Self {
232 unsafe {
233 let v = tag.to_vec();
234 let c_str = ffi::CString::new(v).unwrap();
235 htslib::bcf_hdr_remove(self.inner, type_ as i32, c_str.as_ptr());
236 }
237 self
238 }
239}
240
241impl Drop for Header {
242 fn drop(&mut self) {
243 unsafe { htslib::bcf_hdr_destroy(self.inner) };
244 }
245}
246
247#[derive(Debug)]
249pub enum HeaderRecord {
250 Filter {
252 key: String,
253 values: LinearMap<String, String>,
254 },
255 Info {
257 key: String,
258 values: LinearMap<String, String>,
259 },
260 Format {
262 key: String,
263 values: LinearMap<String, String>,
264 },
265 Contig {
267 key: String,
268 values: LinearMap<String, String>,
269 },
270 Structured {
272 key: String,
273 values: LinearMap<String, String>,
274 },
275 Generic { key: String, value: String },
277}
278
279#[derive(Debug)]
280pub struct HeaderView {
281 pub(crate) inner: *mut htslib::bcf_hdr_t,
282}
283
284unsafe impl Send for HeaderView {}
285unsafe impl Sync for HeaderView {}
286
287impl HeaderView {
288 pub unsafe fn from_ptr(inner: *mut htslib::bcf_hdr_t) -> Self {
293 HeaderView { inner }
294 }
295
296 pub unsafe fn as_ptr(&self) -> *mut htslib::bcf_hdr_t {
302 self.inner
303 }
304
305 #[inline]
306 fn inner(&self) -> htslib::bcf_hdr_t {
307 unsafe { *self.inner }
308 }
309
310 pub fn sample_count(&self) -> u32 {
312 self.inner().n[htslib::BCF_DT_SAMPLE as usize] as u32
313 }
314
315 pub fn samples(&self) -> Vec<&[u8]> {
317 let names =
318 unsafe { slice::from_raw_parts(self.inner().samples, self.sample_count() as usize) };
319 names
320 .iter()
321 .map(|name| unsafe { ffi::CStr::from_ptr(*name).to_bytes() })
322 .collect()
323 }
324
325 pub fn sample_id(&self, sample: &[u8]) -> Option<usize> {
328 self.samples().iter().position(|s| *s == sample)
329 }
330
331 pub fn contig_count(&self) -> u32 {
333 self.inner().n[htslib::BCF_DT_CTG as usize] as u32
334 }
335
336 pub fn rid2name(&self, rid: u32) -> Result<&[u8]> {
337 if rid <= self.contig_count() {
338 unsafe {
339 let dict = self.inner().id[htslib::BCF_DT_CTG as usize];
340 let ptr = (*dict.offset(rid as isize)).key;
341 Ok(ffi::CStr::from_ptr(ptr).to_bytes())
342 }
343 } else {
344 Err(Error::BcfUnknownRID { rid })
345 }
346 }
347
348 pub fn name2rid(&self, name: &[u8]) -> Result<u32> {
368 let c_str = ffi::CString::new(name).unwrap();
369 unsafe {
370 match htslib::bcf_hdr_id2int(
371 self.inner,
372 htslib::BCF_DT_CTG as i32,
373 c_str.as_ptr() as *mut c_char,
374 ) {
375 -1 => Err(Error::BcfUnknownContig {
376 contig: str::from_utf8(name).unwrap().to_owned(),
377 }),
378 i => Ok(i as u32),
379 }
380 }
381 }
382
383 pub fn info_type(&self, tag: &[u8]) -> Result<(TagType, TagLength)> {
384 self.tag_type(tag, htslib::BCF_HL_INFO)
385 }
386
387 pub fn format_type(&self, tag: &[u8]) -> Result<(TagType, TagLength)> {
388 self.tag_type(tag, htslib::BCF_HL_FMT)
389 }
390
391 fn tag_type(&self, tag: &[u8], hdr_type: ::libc::c_uint) -> Result<(TagType, TagLength)> {
392 let tag_desc = || str::from_utf8(tag).unwrap().to_owned();
393 let c_str_tag = ffi::CString::new(tag).unwrap();
394 let (_type, length, num_values) = unsafe {
395 let id = htslib::bcf_hdr_id2int(
396 self.inner,
397 htslib::BCF_DT_ID as i32,
398 c_str_tag.as_ptr() as *mut c_char,
399 );
400 if id < 0 {
401 return Err(Error::BcfUndefinedTag { tag: tag_desc() });
402 }
403 let n = (*self.inner).n[htslib::BCF_DT_ID as usize] as usize;
404 let entry = slice::from_raw_parts((*self.inner).id[htslib::BCF_DT_ID as usize], n);
405 let d = (*entry[id as usize].val).info[hdr_type as usize];
406 ((d >> 4) & 0xf, (d >> 8) & 0xf, d >> 12)
407 };
408 let _type = match _type as ::libc::c_uint {
409 htslib::BCF_HT_FLAG => TagType::Flag,
410 htslib::BCF_HT_INT => TagType::Integer,
411 htslib::BCF_HT_REAL => TagType::Float,
412 htslib::BCF_HT_STR => TagType::String,
413 _ => return Err(Error::BcfUnexpectedType { tag: tag_desc() }),
414 };
415 let length = match length as ::libc::c_uint {
416 htslib::BCF_VL_FIXED => TagLength::Fixed(num_values as u32),
418 htslib::BCF_VL_VAR => TagLength::Variable,
419 htslib::BCF_VL_A => TagLength::AltAlleles,
420 htslib::BCF_VL_R => TagLength::Alleles,
421 htslib::BCF_VL_G => TagLength::Genotypes,
422 _ => return Err(Error::BcfUnexpectedType { tag: tag_desc() }),
423 };
424
425 Ok((_type, length))
426 }
427
428 pub fn name_to_id(&self, id: &[u8]) -> Result<Id> {
430 let c_str = ffi::CString::new(id).unwrap();
431 unsafe {
432 match htslib::bcf_hdr_id2int(
433 self.inner,
434 htslib::BCF_DT_ID as i32,
435 c_str.as_ptr() as *const c_char,
436 ) {
437 -1 => Err(Error::BcfUnknownID {
438 id: str::from_utf8(id).unwrap().to_owned(),
439 }),
440 i => Ok(Id(i as u32)),
441 }
442 }
443 }
444
445 pub fn id_to_name(&self, id: Id) -> Vec<u8> {
448 let key = unsafe {
449 ffi::CStr::from_ptr(
450 (*(*self.inner).id[htslib::BCF_DT_ID as usize].offset(*id as isize)).key,
451 )
452 };
453 key.to_bytes().to_vec()
454 }
455
456 pub fn sample_to_id(&self, id: &[u8]) -> Result<Id> {
458 let c_str = ffi::CString::new(id).unwrap();
459 unsafe {
460 match htslib::bcf_hdr_id2int(
461 self.inner,
462 htslib::BCF_DT_SAMPLE as i32,
463 c_str.as_ptr() as *const c_char,
464 ) {
465 -1 => Err(Error::BcfUnknownSample {
466 name: str::from_utf8(id).unwrap().to_owned(),
467 }),
468 i => Ok(Id(i as u32)),
469 }
470 }
471 }
472
473 pub fn id_to_sample(&self, id: Id) -> Vec<u8> {
475 let key = unsafe {
476 ffi::CStr::from_ptr(
477 (*(*self.inner).id[htslib::BCF_DT_SAMPLE as usize].offset(*id as isize)).key,
478 )
479 };
480 key.to_bytes().to_vec()
481 }
482
483 pub fn header_records(&self) -> Vec<HeaderRecord> {
485 fn parse_kv(rec: &htslib::bcf_hrec_t) -> LinearMap<String, String> {
486 let mut result: LinearMap<String, String> = LinearMap::new();
487 for i in 0_i32..(rec.nkeys) {
488 let key = unsafe {
489 ffi::CStr::from_ptr(*rec.keys.offset(i as isize))
490 .to_str()
491 .unwrap()
492 .to_string()
493 };
494 let value = unsafe {
495 ffi::CStr::from_ptr(*rec.vals.offset(i as isize))
496 .to_str()
497 .unwrap()
498 .to_string()
499 };
500 result.insert(key, value);
501 }
502 result
503 }
504
505 let mut result: Vec<HeaderRecord> = Vec::new();
506 for i in 0_i32..unsafe { (*self.inner).nhrec } {
507 let rec = unsafe { &(**(*self.inner).hrec.offset(i as isize)) };
508 let key = unsafe { ffi::CStr::from_ptr(rec.key).to_str().unwrap().to_string() };
509 let record = match rec.type_ {
510 0 => HeaderRecord::Filter {
511 key,
512 values: parse_kv(rec),
513 },
514 1 => HeaderRecord::Info {
515 key,
516 values: parse_kv(rec),
517 },
518 2 => HeaderRecord::Format {
519 key,
520 values: parse_kv(rec),
521 },
522 3 => HeaderRecord::Contig {
523 key,
524 values: parse_kv(rec),
525 },
526 4 => HeaderRecord::Structured {
527 key,
528 values: parse_kv(rec),
529 },
530 5 => HeaderRecord::Generic {
531 key,
532 value: unsafe { ffi::CStr::from_ptr(rec.value).to_str().unwrap().to_string() },
533 },
534 _ => panic!("Unknown type: {}", rec.type_),
535 };
536 result.push(record);
537 }
538 result
539 }
540
541 pub fn empty_record(self: &Arc<Self>) -> crate::bcf::Record {
545 crate::bcf::Record::new(self.clone())
546 }
547}
548
549impl Clone for HeaderView {
550 fn clone(&self) -> Self {
551 HeaderView {
552 inner: unsafe { htslib::bcf_hdr_dup(self.inner) },
553 }
554 }
555}
556
557impl Drop for HeaderView {
558 fn drop(&mut self) {
559 unsafe {
560 htslib::bcf_hdr_destroy(self.inner);
561 }
562 }
563}
564
565#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
566pub enum TagType {
567 Flag,
568 Integer,
569 Float,
570 String,
571}
572
573#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
574pub enum TagLength {
575 Fixed(u32),
576 AltAlleles,
577 Alleles,
578 Genotypes,
579 Variable,
580}
581
582#[cfg(test)]
583mod tests {
584 use super::*;
585 use crate::bcf::Reader;
586 use crate::htslib;
587
588 #[test]
589 fn test_header_view_empty_record() {
590 let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file");
592 let header_view = vcf.header.clone();
593
594 let record = header_view.empty_record();
596 eprintln!("{:?}", record.rid());
597
598 assert_eq!(record.rid(), Some(0)); assert_eq!(record.pos(), 0); assert_eq!(record.qual(), 0.0); }
603
604 #[test]
605 fn test_header_add_sample_via_raw_pointer() {
606 let sample_name = b"test-sample";
607
608 let header = Header::new();
609 let sample = std::ffi::CString::new(sample_name).unwrap();
610
611 let view = unsafe {
612 let ptr = header.inner_ptr();
613 std::mem::forget(header);
615 htslib::bcf_hdr_add_sample(ptr, sample.as_ptr());
616 htslib::bcf_hdr_sync(ptr);
617 HeaderView::from_ptr(ptr)
619 };
620
621 assert_eq!(view.samples(), vec![sample_name]);
622 }
623
624 #[test]
625 fn test_header_view_version_via_raw_pointer() {
626 let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file");
627 let hv = vcf.header.clone();
628
629 let version = unsafe {
630 let ptr = hv.as_ptr();
632 let version_charptr = htslib::bcf_hdr_get_version(ptr);
633 std::ffi::CStr::from_ptr(version_charptr).to_str().unwrap()
634 };
635
636 assert_eq!(version, "VCFv4.1");
637 }
638}