1use std::ffi;
36use std::os::raw::c_char;
37use std::rc::Rc;
38use std::slice;
39use std::str;
40
41use crate::htslib;
42
43use linear_map::LinearMap;
44
45use crate::errors::{Error, Result};
46
47pub type SampleSubset = Vec<i32>;
48
49custom_derive! {
50 #[derive(
52 NewtypeFrom,
53 NewtypeDeref,
54 PartialEq,
55 PartialOrd,
56 Eq,
57 Ord,
58 Copy,
59 Clone,
60 Debug
61 )]
62 pub struct Id(pub u32);
63}
64
65#[derive(Debug)]
67pub struct Header {
68 pub inner: *mut htslib::bcf_hdr_t,
69 pub subset: Option<SampleSubset>,
70}
71
72impl Default for Header {
73 fn default() -> Self {
74 Self::new()
75 }
76}
77
78impl Header {
79 pub fn new() -> Self {
81 let c_str = ffi::CString::new(&b"w"[..]).unwrap();
82 Header {
83 inner: unsafe { htslib::bcf_hdr_init(c_str.as_ptr()) },
84 subset: None,
85 }
86 }
87
88 pub fn from_template(header: &HeaderView) -> Self {
96 Header {
97 inner: unsafe { htslib::bcf_hdr_dup(header.inner) },
98 subset: None,
99 }
100 }
101
102 pub fn from_template_subset(header: &HeaderView, samples: &[&[u8]]) -> Result<Self> {
110 let mut imap = vec![0; samples.len()];
111 let names: Vec<_> = samples
112 .iter()
113 .map(|&s| ffi::CString::new(s).unwrap())
114 .collect();
115 let name_pointers: Vec<_> = names.iter().map(|s| s.as_ptr() as *mut i8).collect();
116 #[allow(clippy::unnecessary_cast)]
117 let name_pointers_ptr = name_pointers.as_ptr() as *const *mut c_char;
118 let inner = unsafe {
119 htslib::bcf_hdr_subset(
120 header.inner,
121 samples.len() as i32,
122 name_pointers_ptr,
123 imap.as_mut_ptr(),
124 )
125 };
126 if inner.is_null() {
127 Err(Error::BcfDuplicateSampleNames)
128 } else {
129 Ok(Header {
130 inner,
131 subset: Some(imap),
132 })
133 }
134 }
135
136 pub fn push_sample(&mut self, sample: &[u8]) -> &mut Self {
142 let c_str = ffi::CString::new(sample).unwrap();
143 unsafe { htslib::bcf_hdr_add_sample(self.inner, c_str.as_ptr()) };
144 self
145 }
146
147 pub fn push_record(&mut self, record: &[u8]) -> &mut Self {
159 let c_str = ffi::CString::new(record).unwrap();
160 unsafe { htslib::bcf_hdr_append(self.inner, c_str.as_ptr()) };
161 self
162 }
163
164 pub fn remove_filter(&mut self, tag: &[u8]) -> &mut Self {
170 self.remove_impl(tag, htslib::BCF_HL_FLT)
171 }
172
173 pub fn remove_info(&mut self, tag: &[u8]) -> &mut Self {
179 self.remove_impl(tag, htslib::BCF_HL_INFO)
180 }
181
182 pub fn remove_format(&mut self, tag: &[u8]) -> &mut Self {
188 self.remove_impl(tag, htslib::BCF_HL_FMT)
189 }
190
191 pub fn remove_contig(&mut self, tag: &[u8]) -> &mut Self {
197 self.remove_impl(tag, htslib::BCF_HL_CTG)
198 }
199
200 pub fn remove_structured(&mut self, tag: &[u8]) -> &mut Self {
206 self.remove_impl(tag, htslib::BCF_HL_STR)
207 }
208
209 pub fn remove_generic(&mut self, tag: &[u8]) -> &mut Self {
215 self.remove_impl(tag, htslib::BCF_HL_GEN)
216 }
217
218 fn remove_impl(&mut self, tag: &[u8], type_: u32) -> &mut Self {
220 unsafe {
221 let v = tag.to_vec();
222 let c_str = ffi::CString::new(v).unwrap();
223 htslib::bcf_hdr_remove(self.inner, type_ as i32, c_str.as_ptr());
224 }
225 self
226 }
227}
228
229impl Drop for Header {
230 fn drop(&mut self) {
231 unsafe { htslib::bcf_hdr_destroy(self.inner) };
232 }
233}
234
235#[derive(Debug)]
237pub enum HeaderRecord {
238 Filter {
240 key: String,
241 values: LinearMap<String, String>,
242 },
243 Info {
245 key: String,
246 values: LinearMap<String, String>,
247 },
248 Format {
250 key: String,
251 values: LinearMap<String, String>,
252 },
253 Contig {
255 key: String,
256 values: LinearMap<String, String>,
257 },
258 Structured {
260 key: String,
261 values: LinearMap<String, String>,
262 },
263 Generic { key: String, value: String },
265}
266
267#[derive(Debug)]
268pub struct HeaderView {
269 pub inner: *mut htslib::bcf_hdr_t,
270}
271
272impl HeaderView {
273 pub fn new(inner: *mut htslib::bcf_hdr_t) -> Self {
274 HeaderView { inner }
275 }
276
277 #[inline]
278 fn inner(&self) -> htslib::bcf_hdr_t {
279 unsafe { *self.inner }
280 }
281
282 pub fn sample_count(&self) -> u32 {
284 self.inner().n[htslib::BCF_DT_SAMPLE as usize] as u32
285 }
286
287 pub fn samples(&self) -> Vec<&[u8]> {
289 let names =
290 unsafe { slice::from_raw_parts(self.inner().samples, self.sample_count() as usize) };
291 names
292 .iter()
293 .map(|name| unsafe { ffi::CStr::from_ptr(*name).to_bytes() })
294 .collect()
295 }
296
297 pub fn sample_id(&self, sample: &[u8]) -> Option<usize> {
300 self.samples().iter().position(|s| *s == sample)
301 }
302
303 pub fn contig_count(&self) -> u32 {
305 self.inner().n[htslib::BCF_DT_CTG as usize] as u32
306 }
307
308 pub fn rid2name(&self, rid: u32) -> Result<&[u8]> {
309 if rid <= self.contig_count() {
310 unsafe {
311 let dict = self.inner().id[htslib::BCF_DT_CTG as usize];
312 let ptr = (*dict.offset(rid as isize)).key;
313 Ok(ffi::CStr::from_ptr(ptr).to_bytes())
314 }
315 } else {
316 Err(Error::BcfUnknownRID { rid })
317 }
318 }
319
320 pub fn name2rid(&self, name: &[u8]) -> Result<u32> {
340 let c_str = ffi::CString::new(name).unwrap();
341 unsafe {
342 match htslib::bcf_hdr_id2int(
343 self.inner,
344 htslib::BCF_DT_CTG as i32,
345 c_str.as_ptr() as *mut c_char,
346 ) {
347 -1 => Err(Error::BcfUnknownContig {
348 contig: str::from_utf8(name).unwrap().to_owned(),
349 }),
350 i => Ok(i as u32),
351 }
352 }
353 }
354
355 pub fn info_type(&self, tag: &[u8]) -> Result<(TagType, TagLength)> {
356 self.tag_type(tag, htslib::BCF_HL_INFO)
357 }
358
359 pub fn format_type(&self, tag: &[u8]) -> Result<(TagType, TagLength)> {
360 self.tag_type(tag, htslib::BCF_HL_FMT)
361 }
362
363 fn tag_type(&self, tag: &[u8], hdr_type: ::libc::c_uint) -> Result<(TagType, TagLength)> {
364 let tag_desc = || str::from_utf8(tag).unwrap().to_owned();
365 let c_str_tag = ffi::CString::new(tag).unwrap();
366 let (_type, length, num_values) = unsafe {
367 let id = htslib::bcf_hdr_id2int(
368 self.inner,
369 htslib::BCF_DT_ID as i32,
370 c_str_tag.as_ptr() as *mut c_char,
371 );
372 if id < 0 {
373 return Err(Error::BcfUndefinedTag { tag: tag_desc() });
374 }
375 let n = (*self.inner).n[htslib::BCF_DT_ID as usize] as usize;
376 let entry = slice::from_raw_parts((*self.inner).id[htslib::BCF_DT_ID as usize], n);
377 let d = (*entry[id as usize].val).info[hdr_type as usize];
378 ((d >> 4) & 0xf, (d >> 8) & 0xf, d >> 12)
379 };
380 let _type = match _type as ::libc::c_uint {
381 htslib::BCF_HT_FLAG => TagType::Flag,
382 htslib::BCF_HT_INT => TagType::Integer,
383 htslib::BCF_HT_REAL => TagType::Float,
384 htslib::BCF_HT_STR => TagType::String,
385 _ => return Err(Error::BcfUnexpectedType { tag: tag_desc() }),
386 };
387 let length = match length as ::libc::c_uint {
388 htslib::BCF_VL_FIXED => TagLength::Fixed(num_values as u32),
390 htslib::BCF_VL_VAR => TagLength::Variable,
391 htslib::BCF_VL_A => TagLength::AltAlleles,
392 htslib::BCF_VL_R => TagLength::Alleles,
393 htslib::BCF_VL_G => TagLength::Genotypes,
394 _ => return Err(Error::BcfUnexpectedType { tag: tag_desc() }),
395 };
396
397 Ok((_type, length))
398 }
399
400 pub fn name_to_id(&self, id: &[u8]) -> Result<Id> {
402 let c_str = ffi::CString::new(id).unwrap();
403 unsafe {
404 match htslib::bcf_hdr_id2int(
405 self.inner,
406 htslib::BCF_DT_ID as i32,
407 c_str.as_ptr() as *const c_char,
408 ) {
409 -1 => Err(Error::BcfUnknownID {
410 id: str::from_utf8(id).unwrap().to_owned(),
411 }),
412 i => Ok(Id(i as u32)),
413 }
414 }
415 }
416
417 pub fn id_to_name(&self, id: Id) -> Vec<u8> {
420 let key = unsafe {
421 ffi::CStr::from_ptr(
422 (*(*self.inner).id[htslib::BCF_DT_ID as usize].offset(*id as isize)).key,
423 )
424 };
425 key.to_bytes().to_vec()
426 }
427
428 pub fn sample_to_id(&self, id: &[u8]) -> Result<Id> {
430 let c_str = ffi::CString::new(id).unwrap();
431 unsafe {
432 match htslib::bcf_hdr_id2int(
433 self.inner,
434 htslib::BCF_DT_SAMPLE as i32,
435 c_str.as_ptr() as *const c_char,
436 ) {
437 -1 => Err(Error::BcfUnknownSample {
438 name: str::from_utf8(id).unwrap().to_owned(),
439 }),
440 i => Ok(Id(i as u32)),
441 }
442 }
443 }
444
445 pub fn id_to_sample(&self, id: Id) -> Vec<u8> {
447 let key = unsafe {
448 ffi::CStr::from_ptr(
449 (*(*self.inner).id[htslib::BCF_DT_SAMPLE as usize].offset(*id as isize)).key,
450 )
451 };
452 key.to_bytes().to_vec()
453 }
454
455 pub fn header_records(&self) -> Vec<HeaderRecord> {
457 fn parse_kv(rec: &htslib::bcf_hrec_t) -> LinearMap<String, String> {
458 let mut result: LinearMap<String, String> = LinearMap::new();
459 for i in 0_i32..(rec.nkeys) {
460 let key = unsafe {
461 ffi::CStr::from_ptr(*rec.keys.offset(i as isize))
462 .to_str()
463 .unwrap()
464 .to_string()
465 };
466 let value = unsafe {
467 ffi::CStr::from_ptr(*rec.vals.offset(i as isize))
468 .to_str()
469 .unwrap()
470 .to_string()
471 };
472 result.insert(key, value);
473 }
474 result
475 }
476
477 let mut result: Vec<HeaderRecord> = Vec::new();
478 for i in 0_i32..unsafe { (*self.inner).nhrec } {
479 let rec = unsafe { &(**(*self.inner).hrec.offset(i as isize)) };
480 let key = unsafe { ffi::CStr::from_ptr(rec.key).to_str().unwrap().to_string() };
481 let record = match rec.type_ {
482 0 => HeaderRecord::Filter {
483 key,
484 values: parse_kv(rec),
485 },
486 1 => HeaderRecord::Info {
487 key,
488 values: parse_kv(rec),
489 },
490 2 => HeaderRecord::Format {
491 key,
492 values: parse_kv(rec),
493 },
494 3 => HeaderRecord::Contig {
495 key,
496 values: parse_kv(rec),
497 },
498 4 => HeaderRecord::Structured {
499 key,
500 values: parse_kv(rec),
501 },
502 5 => HeaderRecord::Generic {
503 key,
504 value: unsafe { ffi::CStr::from_ptr(rec.value).to_str().unwrap().to_string() },
505 },
506 _ => panic!("Unknown type: {}", rec.type_),
507 };
508 result.push(record);
509 }
510 result
511 }
512
513 pub fn empty_record(&self) -> crate::bcf::Record {
517 crate::bcf::Record::new(Rc::new(self.clone()))
518 }
519}
520
521impl Clone for HeaderView {
522 fn clone(&self) -> Self {
523 HeaderView {
524 inner: unsafe { htslib::bcf_hdr_dup(self.inner) },
525 }
526 }
527}
528
529impl Drop for HeaderView {
530 fn drop(&mut self) {
531 unsafe {
532 htslib::bcf_hdr_destroy(self.inner);
533 }
534 }
535}
536
537#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
538pub enum TagType {
539 Flag,
540 Integer,
541 Float,
542 String,
543}
544
545#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
546pub enum TagLength {
547 Fixed(u32),
548 AltAlleles,
549 Alleles,
550 Genotypes,
551 Variable,
552}
553
554#[cfg(test)]
555mod tests {
556 use crate::bcf::Reader;
557
558 #[test]
559 fn test_header_view_empty_record() {
560 let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file");
562 let header_view = vcf.header.clone();
563
564 let record = header_view.empty_record();
566 eprintln!("{:?}", record.rid());
567
568 assert_eq!(record.rid(), Some(0)); assert_eq!(record.pos(), 0); assert_eq!(record.qual(), 0.0); }
573}