rust_htslib/bcf/record.rs
1// Copyright 2014 Johannes Köster.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6use std::borrow::{Borrow, BorrowMut};
7use std::fmt;
8use std::marker::PhantomData;
9use std::ops::Deref;
10use std::os::raw::c_char;
11use std::ptr;
12use std::slice;
13use std::str;
14use std::sync::Arc;
15use std::{ffi, iter};
16
17use bio_types::genome;
18use derive_new::new;
19use ieee754::Ieee754;
20use lazy_static::lazy_static;
21
22use crate::bcf::header::{HeaderView, Id};
23use crate::bcf::Error;
24use crate::errors::Result;
25use crate::htslib;
26
27const MISSING_INTEGER: i32 = i32::MIN;
28const VECTOR_END_INTEGER: i32 = i32::MIN + 1;
29
30lazy_static! {
31 static ref MISSING_FLOAT: f32 = Ieee754::from_bits(0x7F80_0001);
32 static ref VECTOR_END_FLOAT: f32 = Ieee754::from_bits(0x7F80_0002);
33}
34
35/// Common methods for numeric INFO and FORMAT entries
36pub trait Numeric {
37 /// Return true if entry is a missing value
38 fn is_missing(&self) -> bool;
39
40 /// Return missing value for storage in BCF record.
41 fn missing() -> Self;
42}
43
44impl Numeric for f32 {
45 fn is_missing(&self) -> bool {
46 self.bits() == MISSING_FLOAT.bits()
47 }
48
49 fn missing() -> f32 {
50 *MISSING_FLOAT
51 }
52}
53
54impl Numeric for i32 {
55 fn is_missing(&self) -> bool {
56 *self == MISSING_INTEGER
57 }
58
59 fn missing() -> i32 {
60 MISSING_INTEGER
61 }
62}
63
64trait NumericUtils {
65 /// Return true if entry marks the end of the record.
66 fn is_vector_end(&self) -> bool;
67}
68
69impl NumericUtils for f32 {
70 fn is_vector_end(&self) -> bool {
71 self.bits() == VECTOR_END_FLOAT.bits()
72 }
73}
74
75impl NumericUtils for i32 {
76 fn is_vector_end(&self) -> bool {
77 *self == VECTOR_END_INTEGER
78 }
79}
80
81/// A trait to allow for seamless use of bytes or integer identifiers for filters
82pub trait FilterId {
83 fn id_from_header(&self, header: &HeaderView) -> Result<Id>;
84 fn is_pass(&self) -> bool;
85}
86
87impl FilterId for [u8] {
88 fn id_from_header(&self, header: &HeaderView) -> Result<Id> {
89 header.name_to_id(self)
90 }
91 fn is_pass(&self) -> bool {
92 matches!(self, b"PASS" | b".")
93 }
94}
95
96impl FilterId for Id {
97 fn id_from_header(&self, _header: &HeaderView) -> Result<Id> {
98 Ok(*self)
99 }
100 fn is_pass(&self) -> bool {
101 *self == Id(0)
102 }
103}
104
105/// A buffer for info or format data.
106#[derive(Debug)]
107pub struct Buffer {
108 inner: *mut ::std::os::raw::c_void,
109 len: i32,
110}
111
112impl Buffer {
113 pub fn new() -> Self {
114 Buffer {
115 inner: ptr::null_mut(),
116 len: 0,
117 }
118 }
119}
120
121impl Default for Buffer {
122 fn default() -> Self {
123 Self::new()
124 }
125}
126
127impl Drop for Buffer {
128 fn drop(&mut self) {
129 unsafe {
130 ::libc::free(self.inner);
131 }
132 }
133}
134
135#[derive(new, Debug)]
136pub struct BufferBacked<'a, T: 'a + fmt::Debug, B: Borrow<Buffer> + 'a> {
137 value: T,
138 _buffer: B,
139 #[new(default)]
140 phantom: PhantomData<&'a B>,
141}
142
143impl<'a, T: 'a + fmt::Debug, B: Borrow<Buffer> + 'a> Deref for BufferBacked<'a, T, B> {
144 type Target = T;
145
146 fn deref(&self) -> &T {
147 &self.value
148 }
149}
150
151impl<'a, T: 'a + fmt::Debug + fmt::Display, B: Borrow<Buffer> + 'a> fmt::Display
152 for BufferBacked<'a, T, B>
153{
154 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155 fmt::Display::fmt(&self.value, f)
156 }
157}
158
159/// A VCF/BCF record.
160/// New records can be created by the `empty_record` methods of [`bcf::Reader`](crate::bcf::Reader)
161/// and [`bcf::Writer`](crate::bcf::Writer).
162/// # Example
163/// ```rust
164/// use rust_htslib::bcf::{Format, Writer};
165/// use rust_htslib::bcf::header::Header;
166///
167/// // Create minimal VCF header with a single sample
168/// let mut header = Header::new();
169/// header.push_sample("sample".as_bytes());
170///
171/// // Write uncompressed VCF to stdout with above header and get an empty record
172/// let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
173/// let mut record = vcf.empty_record();
174/// ```
175#[derive(Debug)]
176pub struct Record {
177 pub inner: *mut htslib::bcf1_t,
178 header: Arc<HeaderView>,
179}
180
181impl Record {
182 /// Construct record with reference to header `HeaderView`, for create-internal use.
183 pub fn new(header: Arc<HeaderView>) -> Self {
184 let inner = unsafe {
185 let inner = htslib::bcf_init();
186 // Always unpack record.
187 htslib::bcf_unpack(inner, htslib::BCF_UN_ALL as i32);
188 inner
189 };
190 Record { inner, header }
191 }
192
193 /// Force unpacking of internal record values.
194 pub fn unpack(&mut self) {
195 unsafe { htslib::bcf_unpack(self.inner, htslib::BCF_UN_ALL as i32) };
196 }
197
198 /// Return associated header.
199 pub fn header(&self) -> &HeaderView {
200 self.header.as_ref()
201 }
202
203 /// Translate the record to the given header.
204 pub fn translate(&mut self, dst_header: &mut Arc<HeaderView>) -> Result<()> {
205 if unsafe { htslib::bcf_translate(dst_header.inner, self.header().inner, self.inner) } == 0
206 {
207 self.set_header(Arc::clone(dst_header));
208 Ok(())
209 } else {
210 Err(Error::BcfTranslate)
211 }
212 }
213
214 /// Set the record header.
215 pub(crate) fn set_header(&mut self, header: Arc<HeaderView>) {
216 self.header = header;
217 }
218
219 /// Return reference to the inner C struct.
220 ///
221 /// # Remarks
222 ///
223 /// Note that this function is only required as long as Rust-Htslib does not provide full
224 /// access to all aspects of Htslib.
225 pub fn inner(&self) -> &htslib::bcf1_t {
226 unsafe { &*self.inner }
227 }
228
229 /// Return mutable reference to inner C struct.
230 ///
231 /// # Remarks
232 ///
233 /// Note that this function is only required as long as Rust-Htslib does not provide full
234 /// access to all aspects of Htslib.
235 pub fn inner_mut(&mut self) -> &mut htslib::bcf1_t {
236 unsafe { &mut *self.inner }
237 }
238
239 /// Get the reference id of the record.
240 ///
241 /// To look up the contig name,
242 /// use [`HeaderView::rid2name`](../header/struct.HeaderView.html#method.rid2name).
243 ///
244 /// # Returns
245 ///
246 /// - `Some(rid)` if the internal `rid` is set to a value that is not `-1`
247 /// - `None` if the internal `rid` is set to `-1`
248 pub fn rid(&self) -> Option<u32> {
249 match self.inner().rid {
250 -1 => None,
251 rid => Some(rid as u32),
252 }
253 }
254
255 /// Update the reference id of the record.
256 ///
257 /// To look up reference id for a contig name,
258 /// use [`HeaderView::name2rid`](../header/struct.HeaderView.html#method.name2rid).
259 ///
260 /// # Example
261 ///
262 /// Example assumes we have a Record `record` from a VCF with a header containing region
263 /// named `1`. See [module documentation](../index.html#example-writing) for how to set
264 /// up VCF, header, and record.
265 ///
266 /// ```
267 /// # use rust_htslib::bcf::{Format, Writer};
268 /// # use rust_htslib::bcf::header::Header;
269 /// # let mut header = Header::new();
270 /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
271 /// # header.push_record(header_contig_line.as_bytes());
272 /// # header.push_sample("test_sample".as_bytes());
273 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
274 /// # let mut record = vcf.empty_record();
275 /// let rid = record.header().name2rid(b"1").ok();
276 /// record.set_rid(rid);
277 /// assert_eq!(record.rid(), rid);
278 /// let name = record.header().rid2name(record.rid().unwrap()).ok();
279 /// assert_eq!(Some("1".as_bytes()), name);
280 /// ```
281 pub fn set_rid(&mut self, rid: Option<u32>) {
282 match rid {
283 Some(rid) => self.inner_mut().rid = rid as i32,
284 None => self.inner_mut().rid = -1,
285 }
286 }
287
288 /// Return **0-based** position
289 pub fn pos(&self) -> i64 {
290 self.inner().pos
291 }
292
293 /// Set **0-based** position
294 pub fn set_pos(&mut self, pos: i64) {
295 self.inner_mut().pos = pos;
296 }
297
298 /// Return the **0-based, exclusive** end position
299 ///
300 /// # Example
301 /// ```rust
302 /// # use rust_htslib::bcf::{Format, Header, Writer};
303 /// # use tempfile::NamedTempFile;
304 /// # let tmp = NamedTempFile::new().unwrap();
305 /// # let path = tmp.path();
306 /// # let header = Header::new();
307 /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
308 /// # let mut record = vcf.empty_record();
309 /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
310 /// record.set_alleles(alleles).expect("Failed to set alleles");
311 /// record.set_pos(5);
312 ///
313 /// assert_eq!(record.end(), 8)
314 /// ```
315 pub fn end(&self) -> i64 {
316 self.pos() + self.rlen()
317 }
318
319 /// Return the value of the ID column.
320 ///
321 /// When empty, returns `b".".to_vec()`.
322 pub fn id(&self) -> Vec<u8> {
323 if self.inner().d.id.is_null() {
324 b".".to_vec()
325 } else {
326 let id = unsafe { ffi::CStr::from_ptr(self.inner().d.id) };
327 id.to_bytes().to_vec()
328 }
329 }
330
331 /// Update the ID string to the given value.
332 pub fn set_id(&mut self, id: &[u8]) -> Result<()> {
333 let c_str = ffi::CString::new(id).unwrap();
334 if unsafe {
335 htslib::bcf_update_id(
336 self.header().inner,
337 self.inner,
338 c_str.as_ptr() as *mut c_char,
339 )
340 } == 0
341 {
342 Ok(())
343 } else {
344 Err(Error::BcfSetValues)
345 }
346 }
347
348 /// Clear the ID column (set it to `"."`).
349 pub fn clear_id(&mut self) -> Result<()> {
350 let c_str = ffi::CString::new(&b"."[..]).unwrap();
351 if unsafe {
352 htslib::bcf_update_id(
353 self.header().inner,
354 self.inner,
355 c_str.as_ptr() as *mut c_char,
356 )
357 } == 0
358 {
359 Ok(())
360 } else {
361 Err(Error::BcfSetValues)
362 }
363 }
364
365 /// Add the ID string (the ID field is semicolon-separated), checking for duplicates.
366 pub fn push_id(&mut self, id: &[u8]) -> Result<()> {
367 let c_str = ffi::CString::new(id).unwrap();
368 if unsafe {
369 htslib::bcf_add_id(
370 self.header().inner,
371 self.inner,
372 c_str.as_ptr() as *mut c_char,
373 )
374 } == 0
375 {
376 Ok(())
377 } else {
378 Err(Error::BcfSetValues)
379 }
380 }
381
382 /// Return `Filters` iterator for enumerating all filters that have been set.
383 ///
384 /// A record having the `PASS` filter will return an empty `Filter` here.
385 pub fn filters(&self) -> Filters<'_> {
386 Filters::new(self)
387 }
388
389 /// Query whether the filter with the given ID has been set.
390 ///
391 /// This method can be used to check if a record passes filtering by using either `Id(0)`,
392 /// `PASS` or `.`
393 ///
394 /// # Example
395 /// ```rust
396 /// # use rust_htslib::bcf::{Format, Header, Writer};
397 /// # use rust_htslib::bcf::header::Id;
398 /// # use tempfile::NamedTempFile;
399 /// # let tmp = tempfile::NamedTempFile::new().unwrap();
400 /// # let path = tmp.path();
401 /// let mut header = Header::new();
402 /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
403 /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
404 /// # let mut record = vcf.empty_record();
405 /// assert!(record.has_filter("PASS".as_bytes()));
406 /// assert!(record.has_filter(".".as_bytes()));
407 /// assert!(record.has_filter(&Id(0)));
408 ///
409 /// record.push_filter("foo".as_bytes()).unwrap();
410 /// assert!(record.has_filter("foo".as_bytes()));
411 /// assert!(!record.has_filter("PASS".as_bytes()))
412 /// ```
413 pub fn has_filter<T: FilterId + ?Sized>(&self, flt_id: &T) -> bool {
414 if flt_id.is_pass() && self.inner().d.n_flt == 0 {
415 return true;
416 }
417 let id = match flt_id.id_from_header(self.header()) {
418 Ok(i) => *i,
419 Err(_) => return false,
420 };
421 for i in 0..(self.inner().d.n_flt as isize) {
422 if unsafe { *self.inner().d.flt.offset(i) } == id as i32 {
423 return true;
424 }
425 }
426 false
427 }
428
429 /// Set the given filter IDs to the FILTER column.
430 ///
431 /// Setting an empty slice removes all filters and sets `PASS`.
432 ///
433 /// # Example
434 /// ```rust
435 /// # use rust_htslib::bcf::{Format, Header, Writer};
436 /// # use rust_htslib::bcf::header::Id;
437 /// # use tempfile::NamedTempFile;
438 /// # let tmp = tempfile::NamedTempFile::new().unwrap();
439 /// # let path = tmp.path();
440 /// let mut header = Header::new();
441 /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
442 /// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
443 /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
444 /// # let mut record = vcf.empty_record();
445 /// let foo = record.header().name_to_id(b"foo").unwrap();
446 /// let bar = record.header().name_to_id(b"bar").unwrap();
447 /// assert!(record.has_filter("PASS".as_bytes()));
448 /// let mut filters = vec![&foo, &bar];
449 /// record.set_filters(&filters).unwrap();
450 /// assert!(record.has_filter(&foo));
451 /// assert!(record.has_filter(&bar));
452 /// assert!(!record.has_filter("PASS".as_bytes()));
453 /// filters.clear();
454 /// record.set_filters(&filters).unwrap();
455 /// assert!(record.has_filter("PASS".as_bytes()));
456 /// assert!(!record.has_filter("foo".as_bytes()));
457 /// // 'baz' isn't in the header
458 /// assert!(record.set_filters(&["baz".as_bytes()]).is_err())
459 /// ```
460 ///
461 /// # Errors
462 /// If any of the filter IDs do not exist in the header, an [`Error::BcfUnknownID`] is returned.
463 ///
464 pub fn set_filters<T: FilterId + ?Sized>(&mut self, flt_ids: &[&T]) -> Result<()> {
465 let mut ids: Vec<i32> = flt_ids
466 .iter()
467 .map(|id| id.id_from_header(self.header()).map(|id| *id as i32))
468 .collect::<Result<Vec<i32>>>()?;
469 unsafe {
470 htslib::bcf_update_filter(
471 self.header().inner,
472 self.inner,
473 ids.as_mut_ptr(),
474 ids.len() as i32,
475 );
476 };
477 Ok(())
478 }
479
480 /// Add the given filter to the FILTER column.
481 ///
482 /// If `flt_id` is `PASS` or `.` then all existing filters are removed first. Otherwise,
483 /// any existing `PASS` filter is removed.
484 ///
485 /// # Example
486 /// ```rust
487 /// # use rust_htslib::bcf::{Format, Header, Writer};
488 /// # use tempfile::NamedTempFile;
489 /// # let tmp = tempfile::NamedTempFile::new().unwrap();
490 /// # let path = tmp.path();
491 /// let mut header = Header::new();
492 /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
493 /// header.push_record(br#"##FILTER=<ID=bar,Description="dranks">"#);
494 /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
495 /// # let mut record = vcf.empty_record();
496 /// let foo = "foo".as_bytes();
497 /// let bar = record.header().name_to_id(b"bar").unwrap();
498 /// assert!(record.has_filter("PASS".as_bytes()));
499 ///
500 /// record.push_filter(foo).unwrap();
501 /// record.push_filter(&bar).unwrap();
502 /// assert!(record.has_filter(foo));
503 /// assert!(record.has_filter(&bar));
504 /// // filter must exist in the header
505 /// assert!(record.push_filter("baz".as_bytes()).is_err())
506 /// ```
507 ///
508 /// # Errors
509 /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned.
510 ///
511 pub fn push_filter<T: FilterId + ?Sized>(&mut self, flt_id: &T) -> Result<()> {
512 let id = flt_id.id_from_header(self.header())?;
513 unsafe {
514 htslib::bcf_add_filter(self.header().inner, self.inner, *id as i32);
515 };
516 Ok(())
517 }
518
519 /// Remove the given filter from the FILTER column.
520 ///
521 /// # Arguments
522 ///
523 /// - `flt_id` - The corresponding filter ID to remove.
524 /// - `pass_on_empty` - Set to `PASS` when removing the last filter.
525 ///
526 /// # Example
527 /// ```rust
528 /// # use rust_htslib::bcf::{Format, Header, Writer};
529 /// # use tempfile::NamedTempFile;
530 /// # let tmp = tempfile::NamedTempFile::new().unwrap();
531 /// # let path = tmp.path();
532 /// let mut header = Header::new();
533 /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
534 /// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
535 /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
536 /// # let mut record = vcf.empty_record();
537 /// let foo = "foo".as_bytes();
538 /// let bar = "bar".as_bytes();
539 /// record.set_filters(&[foo, bar]).unwrap();
540 /// assert!(record.has_filter(foo));
541 /// assert!(record.has_filter(bar));
542 ///
543 /// record.remove_filter(foo, true).unwrap();
544 /// assert!(!record.has_filter(foo));
545 /// assert!(record.has_filter(bar));
546 /// // 'baz' is not in the header
547 /// assert!(record.remove_filter("baz".as_bytes(), true).is_err());
548 ///
549 /// record.remove_filter(bar, true).unwrap();
550 /// assert!(!record.has_filter(bar));
551 /// assert!(record.has_filter("PASS".as_bytes()));
552 /// ```
553 ///
554 /// # Errors
555 /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned.
556 ///
557 pub fn remove_filter<T: FilterId + ?Sized>(
558 &mut self,
559 flt_id: &T,
560 pass_on_empty: bool,
561 ) -> Result<()> {
562 let id = flt_id.id_from_header(self.header())?;
563 unsafe {
564 htslib::bcf_remove_filter(
565 self.header().inner,
566 self.inner,
567 *id as i32,
568 pass_on_empty as i32,
569 )
570 };
571 Ok(())
572 }
573
574 /// Get alleles strings.
575 ///
576 /// The first allele is the reference allele.
577 pub fn alleles(&self) -> Vec<&[u8]> {
578 unsafe { htslib::bcf_unpack(self.inner, htslib::BCF_UN_ALL as i32) };
579 let n = self.inner().n_allele() as usize;
580 let dec = self.inner().d;
581 let alleles = unsafe { slice::from_raw_parts(dec.allele, n) };
582 (0..n)
583 .map(|i| unsafe { ffi::CStr::from_ptr(alleles[i]).to_bytes() })
584 .collect()
585 }
586
587 /// Set alleles. The first allele is the reference allele.
588 ///
589 /// # Example
590 /// ```rust
591 /// # use rust_htslib::bcf::{Format, Writer};
592 /// # use rust_htslib::bcf::header::Header;
593 /// #
594 /// # // Create minimal VCF header with a single sample
595 /// # let mut header = Header::new();
596 /// # header.push_sample("sample".as_bytes());
597 /// #
598 /// # // Write uncompressed VCF to stdout with above header and get an empty record
599 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
600 /// # let mut record = vcf.empty_record();
601 /// assert_eq!(record.allele_count(), 0);
602 ///
603 /// let alleles: &[&[u8]] = &[b"A", b"TG"];
604 /// record.set_alleles(alleles).expect("Failed to set alleles");
605 /// assert_eq!(record.allele_count(), 2)
606 /// ```
607 pub fn set_alleles(&mut self, alleles: &[&[u8]]) -> Result<()> {
608 let cstrings: Vec<ffi::CString> = alleles
609 .iter()
610 .map(|vec| ffi::CString::new(*vec).unwrap())
611 .collect();
612 let mut ptrs: Vec<*const c_char> = cstrings
613 .iter()
614 .map(|cstr| cstr.as_ptr() as *const c_char)
615 .collect();
616 if unsafe {
617 htslib::bcf_update_alleles(
618 self.header().inner,
619 self.inner,
620 ptrs.as_mut_ptr(),
621 alleles.len() as i32,
622 )
623 } == 0
624 {
625 Ok(())
626 } else {
627 Err(Error::BcfSetValues)
628 }
629 }
630
631 /// Get variant quality.
632 pub fn qual(&self) -> f32 {
633 self.inner().qual
634 }
635
636 /// Set variant quality.
637 pub fn set_qual(&mut self, qual: f32) {
638 self.inner_mut().qual = qual;
639 }
640
641 pub fn info<'a>(&'a self, tag: &'a [u8]) -> Info<'a, Buffer> {
642 self.info_shared_buffer(tag, Buffer::new())
643 }
644
645 /// Get the value of the given info tag.
646 pub fn info_shared_buffer<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b>(
647 &'a self,
648 tag: &'a [u8],
649 buffer: B,
650 ) -> Info<'a, B> {
651 Info {
652 record: self,
653 tag,
654 buffer,
655 }
656 }
657
658 /// Get the number of samples in the record.
659 pub fn sample_count(&self) -> u32 {
660 self.inner().n_sample()
661 }
662
663 /// Get the number of alleles, including reference allele.
664 pub fn allele_count(&self) -> u32 {
665 self.inner().n_allele()
666 }
667
668 /// Add/replace genotypes in FORMAT GT tag.
669 ///
670 /// # Arguments
671 ///
672 /// - `genotypes` - a flattened, two-dimensional array of GenotypeAllele,
673 /// the first dimension contains one array for each sample.
674 ///
675 /// # Errors
676 ///
677 /// Returns error if GT tag is not present in header.
678 ///
679 /// # Example
680 ///
681 /// Example assumes we have a Record `record` from a VCF with a `GT` `FORMAT` tag.
682 /// See [module documentation](../index.html#example-writing) for how to set up
683 /// VCF, header, and record.
684 ///
685 /// ```
686 /// # use rust_htslib::bcf::{Format, Writer};
687 /// # use rust_htslib::bcf::header::Header;
688 /// # use rust_htslib::bcf::record::GenotypeAllele;
689 /// # let mut header = Header::new();
690 /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
691 /// # header.push_record(header_contig_line.as_bytes());
692 /// # let header_gt_line = r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#;
693 /// # header.push_record(header_gt_line.as_bytes());
694 /// # header.push_sample("test_sample".as_bytes());
695 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
696 /// # let mut record = vcf.empty_record();
697 /// let alleles = &[GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(1)];
698 /// record.push_genotypes(alleles);
699 /// assert_eq!("1/1", &format!("{}", record.genotypes().unwrap().get(0)));
700 /// ```
701 pub fn push_genotypes(&mut self, genotypes: &[GenotypeAllele]) -> Result<()> {
702 let encoded: Vec<i32> = genotypes.iter().map(|gt| i32::from(*gt)).collect();
703 self.push_format_integer(b"GT", &encoded)
704 }
705
706 /// Add/replace genotypes in FORMAT GT tag by providing a list of genotypes.
707 ///
708 /// # Arguments
709 ///
710 /// - `genotypes` - a two-dimensional array of GenotypeAllele
711 /// - `max_ploidy` - the maximum number of alleles allowed for any genotype on any sample.
712 ///
713 /// # Errors
714 ///
715 /// Returns an error if any genotype has more allelles than `max_ploidy` or if the GT tag is not present in the header.
716 ///
717 /// # Example
718 ///
719 /// Example assumes we have a Record `record` from a VCF with a `GT` `FORMAT` tag and three samples.
720 /// See [module documentation](../index.html#example-writing) for how to set up
721 /// VCF, header, and record.
722 ///
723 /// ```
724 /// # use rust_htslib::bcf::{Format, Writer};
725 /// # use rust_htslib::bcf::header::Header;
726 /// # use rust_htslib::bcf::record::GenotypeAllele;
727 /// # use std::iter;
728 /// # let mut header = Header::new();
729 /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
730 /// # header.push_record(header_contig_line.as_bytes());
731 /// # let header_gt_line = r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#;
732 /// # header.push_record(header_gt_line.as_bytes());
733 /// # header.push_sample("first_sample".as_bytes());
734 /// # header.push_sample("second_sample".as_bytes());
735 /// # header.push_sample("third_sample".as_bytes());
736 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf)?;
737 /// # let mut record = vcf.empty_record();
738 /// let alleles = vec![
739 /// vec![GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(1)],
740 /// vec![GenotypeAllele::Unphased(0), GenotypeAllele::Phased(1)],
741 /// vec![GenotypeAllele::Unphased(0)],
742 /// ];
743 /// record.push_genotype_structured(&alleles, 2);
744 /// let gts = record.genotypes()?;
745 /// assert_eq!("1/1", &format!("{}", gts.get(0)));
746 /// assert_eq!("0|1", &format!("{}", gts.get(1)));
747 /// assert_eq!("0", &format!("{}", gts.get(2)));
748 /// # Ok::<(), rust_htslib::errors::Error>(())
749 /// ```
750 pub fn push_genotype_structured<GT>(
751 &mut self,
752 genotypes: &[GT],
753 max_ploidy: usize,
754 ) -> Result<()>
755 where
756 GT: AsRef<[GenotypeAllele]>,
757 {
758 let mut data = Vec::with_capacity(max_ploidy * genotypes.len());
759 for gt in genotypes {
760 if gt.as_ref().len() > max_ploidy {
761 return Err(Error::BcfSetValues);
762 }
763 data.extend(
764 gt.as_ref()
765 .iter()
766 .map(|gta| i32::from(*gta))
767 .chain(iter::repeat_n(
768 VECTOR_END_INTEGER,
769 max_ploidy - gt.as_ref().len(),
770 )),
771 );
772 }
773 self.push_format_integer(b"GT", &data)
774 }
775
776 /// Get genotypes as vector of one `Genotype` per sample.
777 ///
778 /// # Example
779 /// Parsing genotype field (`GT` tag) from a VCF record:
780 /// ```
781 /// use crate::rust_htslib::bcf::{Reader, Read};
782 /// let mut vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
783 /// let expected = ["./1", "1|1", "0/1", "0|1", "1|.", "1/1"];
784 /// for (rec, exp_gt) in vcf.records().zip(expected.iter()) {
785 /// let mut rec = rec.expect("Error reading record.");
786 /// let genotypes = rec.genotypes().expect("Error reading genotypes");
787 /// assert_eq!(&format!("{}", genotypes.get(0)), exp_gt);
788 /// }
789 /// ```
790 pub fn genotypes(&self) -> Result<Genotypes<'_, Buffer>> {
791 self.genotypes_shared_buffer(Buffer::new())
792 }
793
794 /// Get genotypes as vector of one `Genotype` per sample, using a given shared buffer
795 /// to avoid unnecessary allocations.
796 pub fn genotypes_shared_buffer<'a, B>(&self, buffer: B) -> Result<Genotypes<'a, B>>
797 where
798 B: BorrowMut<Buffer> + Borrow<Buffer> + 'a,
799 {
800 Ok(Genotypes {
801 encoded: self.format_shared_buffer(b"GT", buffer).integer()?,
802 })
803 }
804
805 /// Retrieve data for a `FORMAT` field
806 ///
807 /// # Example
808 /// *Note: some boilerplate for the example is hidden for clarity. See [module documentation](../index.html#example-writing)
809 /// for an example of the setup used here.*
810 ///
811 /// ```rust
812 /// # use rust_htslib::bcf::{Format, Writer};
813 /// # use rust_htslib::bcf::header::Header;
814 /// #
815 /// # // Create minimal VCF header with a single sample
816 /// # let mut header = Header::new();
817 /// header.push_sample(b"sample1").push_sample(b"sample2").push_record(br#"##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">"#);
818 /// #
819 /// # // Write uncompressed VCF to stdout with above header and get an empty record
820 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
821 /// # let mut record = vcf.empty_record();
822 /// record.push_format_integer(b"DP", &[20, 12]).expect("Failed to set DP format field");
823 ///
824 /// let read_depths = record.format(b"DP").integer().expect("Couldn't retrieve DP field");
825 /// let sample1_depth = read_depths[0];
826 /// assert_eq!(sample1_depth, &[20]);
827 /// let sample2_depth = read_depths[1];
828 /// assert_eq!(sample2_depth, &[12])
829 /// ```
830 ///
831 /// # Errors
832 /// **Attention:** the returned [`BufferBacked`] from [`integer()`](Format::integer)
833 /// (`read_depths`), which holds the data, has to be kept in scope as long as the data is
834 /// accessed. If parts of the data are accessed after the `BufferBacked` object is been
835 /// dropped, you will access unallocated memory.
836 pub fn format<'a>(&'a self, tag: &'a [u8]) -> Format<'a, Buffer> {
837 self.format_shared_buffer(tag, Buffer::new())
838 }
839
840 /// Get the value of the given format tag for each sample.
841 pub fn format_shared_buffer<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b>(
842 &'a self,
843 tag: &'a [u8],
844 buffer: B,
845 ) -> Format<'a, B> {
846 Format::new(self, tag, buffer)
847 }
848
849 /// Add/replace an integer-typed FORMAT tag.
850 ///
851 /// # Arguments
852 ///
853 /// - `tag` - The tag's string.
854 /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
855 /// for each sample.
856 ///
857 /// # Errors
858 ///
859 /// Returns error if tag is not present in header.
860 pub fn push_format_integer(&mut self, tag: &[u8], data: &[i32]) -> Result<()> {
861 self.push_format(tag, data, htslib::BCF_HT_INT)
862 }
863
864 /// Add/replace a float-typed FORMAT tag.
865 ///
866 /// # Arguments
867 ///
868 /// - `tag` - The tag's string.
869 /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
870 /// for each sample.
871 ///
872 /// # Errors
873 ///
874 /// Returns error if tag is not present in header.
875 ///
876 /// # Example
877 ///
878 /// Example assumes we have a Record `record` from a VCF with an `AF` `FORMAT` tag.
879 /// See [module documentation](../index.html#example-writing) for how to set up
880 /// VCF, header, and record.
881 ///
882 /// ```
883 /// # use rust_htslib::bcf::{Format, Writer};
884 /// # use rust_htslib::bcf::header::Header;
885 /// # use rust_htslib::bcf::record::GenotypeAllele;
886 /// # let mut header = Header::new();
887 /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
888 /// # header.push_record(header_contig_line.as_bytes());
889 /// # let header_af_line = r#"##FORMAT=<ID=AF,Number=1,Type=Float,Description="Frequency">"#;
890 /// # header.push_record(header_af_line.as_bytes());
891 /// # header.push_sample("test_sample".as_bytes());
892 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
893 /// # let mut record = vcf.empty_record();
894 /// record.push_format_float(b"AF", &[0.5]);
895 /// assert_eq!(0.5, record.format(b"AF").float().unwrap()[0][0]);
896 /// ```
897 pub fn push_format_float(&mut self, tag: &[u8], data: &[f32]) -> Result<()> {
898 self.push_format(tag, data, htslib::BCF_HT_REAL)
899 }
900
901 /// Add/replace a single-char-typed FORMAT tag.
902 ///
903 /// # Arguments
904 ///
905 /// - `tag` - The tag's string.
906 /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
907 /// for each sample.
908 ///
909 /// # Errors
910 ///
911 /// Returns error if tag is not present in header.
912 pub fn push_format_char(&mut self, tag: &[u8], data: &[u8]) -> Result<()> {
913 self.push_format(tag, data, htslib::BCF_HT_STR)
914 }
915
916 /// Add a format tag. Data is a flattened two-dimensional array.
917 /// The first dimension contains one array for each sample.
918 fn push_format<T>(&mut self, tag: &[u8], data: &[T], ht: u32) -> Result<()> {
919 let tag_c_str = ffi::CString::new(tag).unwrap();
920 unsafe {
921 if htslib::bcf_update_format(
922 self.header().inner,
923 self.inner,
924 tag_c_str.as_ptr() as *mut c_char,
925 data.as_ptr() as *const ::std::os::raw::c_void,
926 data.len() as i32,
927 ht as i32,
928 ) == 0
929 {
930 Ok(())
931 } else {
932 Err(Error::BcfSetTag {
933 tag: str::from_utf8(tag).unwrap().to_owned(),
934 })
935 }
936 }
937 }
938
939 // TODO: should we add convenience methods clear_format_*?
940
941 /// Add a string-typed FORMAT tag. Note that genotypes are treated as a special case
942 /// and cannot be added with this method. See instead [push_genotypes](#method.push_genotypes).
943 ///
944 /// # Arguments
945 ///
946 /// - `tag` - The tag's string.
947 /// - `data` - a two-dimensional array, the first dimension contains one array
948 /// for each sample. Must be non-empty.
949 ///
950 /// # Errors
951 ///
952 /// Returns error if tag is not present in header.
953 pub fn push_format_string<D: Borrow<[u8]>>(&mut self, tag: &[u8], data: &[D]) -> Result<()> {
954 assert!(
955 !data.is_empty(),
956 "given string data must have at least 1 element"
957 );
958 let c_data = data
959 .iter()
960 .map(|s| ffi::CString::new(s.borrow()).unwrap())
961 .collect::<Vec<ffi::CString>>();
962 let c_ptrs = c_data
963 .iter()
964 .map(|s| s.as_ptr() as *mut i8)
965 .collect::<Vec<*mut i8>>();
966 let tag_c_str = ffi::CString::new(tag).unwrap();
967 unsafe {
968 if htslib::bcf_update_format_string(
969 self.header().inner,
970 self.inner,
971 tag_c_str.as_ptr() as *mut c_char,
972 c_ptrs.as_slice().as_ptr() as *mut *const c_char,
973 data.len() as i32,
974 ) == 0
975 {
976 Ok(())
977 } else {
978 Err(Error::BcfSetTag {
979 tag: str::from_utf8(tag).unwrap().to_owned(),
980 })
981 }
982 }
983 }
984
985 /// Add/replace an integer-typed INFO entry.
986 pub fn push_info_integer(&mut self, tag: &[u8], data: &[i32]) -> Result<()> {
987 self.push_info(tag, data, htslib::BCF_HT_INT)
988 }
989
990 /// Remove the integer-typed INFO entry.
991 pub fn clear_info_integer(&mut self, tag: &[u8]) -> Result<()> {
992 self.push_info::<i32>(tag, &[], htslib::BCF_HT_INT)
993 }
994
995 /// Add/replace a float-typed INFO entry.
996 pub fn push_info_float(&mut self, tag: &[u8], data: &[f32]) -> Result<()> {
997 self.push_info(tag, data, htslib::BCF_HT_REAL)
998 }
999
1000 /// Remove the float-typed INFO entry.
1001 pub fn clear_info_float(&mut self, tag: &[u8]) -> Result<()> {
1002 self.push_info::<u8>(tag, &[], htslib::BCF_HT_REAL)
1003 }
1004
1005 /// Add/replace an INFO tag.
1006 ///
1007 /// # Arguments
1008 /// * `tag` - the tag to add/replace
1009 /// * `data` - the data to set
1010 /// * `ht` - the HTSLib type to use
1011 fn push_info<T>(&mut self, tag: &[u8], data: &[T], ht: u32) -> Result<()> {
1012 let tag_c_str = ffi::CString::new(tag).unwrap();
1013 unsafe {
1014 if htslib::bcf_update_info(
1015 self.header().inner,
1016 self.inner,
1017 tag_c_str.as_ptr() as *mut c_char,
1018 data.as_ptr() as *const ::std::os::raw::c_void,
1019 data.len() as i32,
1020 ht as i32,
1021 ) == 0
1022 {
1023 Ok(())
1024 } else {
1025 Err(Error::BcfSetTag {
1026 tag: str::from_utf8(tag).unwrap().to_owned(),
1027 })
1028 }
1029 }
1030 }
1031
1032 /// Set flag into the INFO column.
1033 pub fn push_info_flag(&mut self, tag: &[u8]) -> Result<()> {
1034 self.push_info_string_impl(tag, &[b""], htslib::BCF_HT_FLAG)
1035 }
1036
1037 /// Remove the flag from the INFO column.
1038 pub fn clear_info_flag(&mut self, tag: &[u8]) -> Result<()> {
1039 self.push_info_string_impl(tag, &[], htslib::BCF_HT_FLAG)
1040 }
1041
1042 /// Add/replace a string-typed INFO entry.
1043 pub fn push_info_string(&mut self, tag: &[u8], data: &[&[u8]]) -> Result<()> {
1044 self.push_info_string_impl(tag, data, htslib::BCF_HT_STR)
1045 }
1046
1047 /// Remove the string field from the INFO column.
1048 pub fn clear_info_string(&mut self, tag: &[u8]) -> Result<()> {
1049 self.push_info_string_impl(tag, &[], htslib::BCF_HT_STR)
1050 }
1051
1052 /// Add an string-valued INFO tag.
1053 fn push_info_string_impl(&mut self, tag: &[u8], data: &[&[u8]], ht: u32) -> Result<()> {
1054 let mut buf: Vec<u8> = Vec::new();
1055 for (i, &s) in data.iter().enumerate() {
1056 if i > 0 {
1057 buf.extend(b",");
1058 }
1059 buf.extend(s);
1060 }
1061 let c_str = ffi::CString::new(buf).unwrap();
1062 let len = if ht == htslib::BCF_HT_FLAG {
1063 data.len()
1064 } else {
1065 c_str.to_bytes().len()
1066 };
1067 let tag_c_str = ffi::CString::new(tag).unwrap();
1068 unsafe {
1069 if htslib::bcf_update_info(
1070 self.header().inner,
1071 self.inner,
1072 tag_c_str.as_ptr() as *mut c_char,
1073 c_str.as_ptr() as *const ::std::os::raw::c_void,
1074 len as i32,
1075 ht as i32,
1076 ) == 0
1077 {
1078 Ok(())
1079 } else {
1080 Err(Error::BcfSetTag {
1081 tag: str::from_utf8(tag).unwrap().to_owned(),
1082 })
1083 }
1084 }
1085 }
1086
1087 /// Remove unused alleles.
1088 pub fn trim_alleles(&mut self) -> Result<()> {
1089 match unsafe { htslib::bcf_trim_alleles(self.header().inner, self.inner) } {
1090 -1 => Err(Error::BcfRemoveAlleles),
1091 _ => Ok(()),
1092 }
1093 }
1094
1095 pub fn remove_alleles(&mut self, remove: &[bool]) -> Result<()> {
1096 let rm_set = unsafe { htslib::kbs_init(remove.len()) };
1097
1098 for (i, &r) in remove.iter().enumerate() {
1099 if r {
1100 unsafe {
1101 htslib::kbs_insert(rm_set, i as i32);
1102 }
1103 }
1104 }
1105
1106 let ret = unsafe { htslib::bcf_remove_allele_set(self.header().inner, self.inner, rm_set) };
1107
1108 unsafe {
1109 htslib::kbs_destroy(rm_set);
1110 }
1111
1112 match ret {
1113 -1 => Err(Error::BcfRemoveAlleles),
1114 _ => Ok(()),
1115 }
1116 }
1117
1118 /// Get the length of the reference allele. If the record has no reference allele, then the
1119 /// result will be `0`.
1120 ///
1121 /// # Example
1122 /// ```rust
1123 /// # use rust_htslib::bcf::{Format, Writer};
1124 /// # use rust_htslib::bcf::header::Header;
1125 /// #
1126 /// # // Create minimal VCF header with a single sample
1127 /// # let mut header = Header::new();
1128 /// # header.push_sample("sample".as_bytes());
1129 /// #
1130 /// # // Write uncompressed VCF to stdout with above header and get an empty record
1131 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
1132 /// # let mut record = vcf.empty_record();
1133 /// # assert_eq!(record.rlen(), 0);
1134 /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1135 /// record.set_alleles(alleles).expect("Failed to set alleles");
1136 /// assert_eq!(record.rlen(), 3)
1137 /// ```
1138 pub fn rlen(&self) -> i64 {
1139 self.inner().rlen
1140 }
1141
1142 /// Clear all parts of the record. Useful if you plan to reuse a record object multiple times.
1143 ///
1144 /// # Example
1145 /// ```rust
1146 /// # use rust_htslib::bcf::{Format, Writer};
1147 /// # use rust_htslib::bcf::header::Header;
1148 /// #
1149 /// # // Create minimal VCF header with a single sample
1150 /// # let mut header = Header::new();
1151 /// # header.push_sample("sample".as_bytes());
1152 /// #
1153 /// # // Write uncompressed VCF to stdout with above header and get an empty record
1154 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
1155 /// # let mut record = vcf.empty_record();
1156 /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1157 /// record.set_alleles(alleles).expect("Failed to set alleles");
1158 /// record.set_pos(6);
1159 /// record.clear();
1160 /// assert_eq!(record.rlen(), 0);
1161 /// assert_eq!(record.pos(), 0)
1162 /// ```
1163 pub fn clear(&self) {
1164 unsafe { htslib::bcf_clear(self.inner) }
1165 }
1166
1167 /// Provide short description of record for locating it in the BCF/VCF file.
1168 pub fn desc(&self) -> String {
1169 if let Some(rid) = self.rid() {
1170 if let Ok(contig) = self.header.rid2name(rid) {
1171 return format!("{}:{}", str::from_utf8(contig).unwrap(), self.pos());
1172 }
1173 }
1174 "".to_owned()
1175 }
1176
1177 /// Convert to VCF String
1178 ///
1179 /// Intended for debug only. Use Writer for efficient VCF output.
1180 ///
1181 pub fn to_vcf_string(&self) -> Result<String> {
1182 let mut buf = htslib::kstring_t {
1183 l: 0,
1184 m: 0,
1185 s: ptr::null_mut(),
1186 };
1187 let ret = unsafe { htslib::vcf_format(self.header().inner, self.inner, &mut buf) };
1188
1189 if ret < 0 {
1190 if !buf.s.is_null() {
1191 unsafe {
1192 libc::free(buf.s as *mut libc::c_void);
1193 }
1194 }
1195 return Err(Error::BcfToString);
1196 }
1197
1198 let vcf_str = unsafe {
1199 let vcf_str = String::from(ffi::CStr::from_ptr(buf.s).to_str().unwrap());
1200 if !buf.s.is_null() {
1201 libc::free(buf.s as *mut libc::c_void);
1202 }
1203 vcf_str
1204 };
1205
1206 Ok(vcf_str)
1207 }
1208}
1209
1210impl Clone for Record {
1211 fn clone(&self) -> Self {
1212 let inner = unsafe { htslib::bcf_dup(self.inner) };
1213 Record {
1214 inner,
1215 header: self.header.clone(),
1216 }
1217 }
1218}
1219
1220impl genome::AbstractLocus for Record {
1221 fn contig(&self) -> &str {
1222 str::from_utf8(
1223 self.header()
1224 .rid2name(self.rid().expect("rid not set"))
1225 .expect("unable to find rid in header"),
1226 )
1227 .expect("unable to interpret contig name as UTF-8")
1228 }
1229
1230 fn pos(&self) -> u64 {
1231 self.pos() as u64
1232 }
1233}
1234
1235/// Phased or unphased alleles, represented as indices.
1236#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1237pub enum GenotypeAllele {
1238 Unphased(i32),
1239 Phased(i32),
1240 UnphasedMissing,
1241 PhasedMissing,
1242}
1243
1244impl GenotypeAllele {
1245 /// Decode given integer according to BCF standard.
1246 #[deprecated(
1247 since = "0.36.0",
1248 note = "Please use the conversion trait From<i32> for GenotypeAllele instead."
1249 )]
1250 pub fn from_encoded(encoded: i32) -> Self {
1251 match (encoded, encoded & 1) {
1252 (0, 0) => GenotypeAllele::UnphasedMissing,
1253 (1, 1) => GenotypeAllele::PhasedMissing,
1254 (e, 1) => GenotypeAllele::Phased((e >> 1) - 1),
1255 (e, 0) => GenotypeAllele::Unphased((e >> 1) - 1),
1256 _ => panic!("unexpected phasing type"),
1257 }
1258 }
1259
1260 /// Get the index into the list of alleles.
1261 pub fn index(self) -> Option<u32> {
1262 match self {
1263 GenotypeAllele::Unphased(i) | GenotypeAllele::Phased(i) => Some(i as u32),
1264 GenotypeAllele::UnphasedMissing | GenotypeAllele::PhasedMissing => None,
1265 }
1266 }
1267}
1268
1269impl fmt::Display for GenotypeAllele {
1270 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1271 match self.index() {
1272 Some(a) => write!(f, "{}", a),
1273 None => write!(f, "."),
1274 }
1275 }
1276}
1277
1278impl From<GenotypeAllele> for i32 {
1279 fn from(allele: GenotypeAllele) -> i32 {
1280 let (allele, phased) = match allele {
1281 GenotypeAllele::UnphasedMissing => (-1, 0),
1282 GenotypeAllele::PhasedMissing => (-1, 1),
1283 GenotypeAllele::Unphased(a) => (a, 0),
1284 GenotypeAllele::Phased(a) => (a, 1),
1285 };
1286 ((allele + 1) << 1) | phased
1287 }
1288}
1289
1290impl From<i32> for GenotypeAllele {
1291 fn from(encoded: i32) -> GenotypeAllele {
1292 match (encoded, encoded & 1) {
1293 (0, 0) => GenotypeAllele::UnphasedMissing,
1294 (1, 1) => GenotypeAllele::PhasedMissing,
1295 (e, 1) => GenotypeAllele::Phased((e >> 1) - 1),
1296 (e, 0) => GenotypeAllele::Unphased((e >> 1) - 1),
1297 _ => panic!("unexpected phasing type"),
1298 }
1299 }
1300}
1301
1302custom_derive! {
1303 /// Genotype representation as a vector of `GenotypeAllele`.
1304 #[derive(NewtypeDeref, Debug, Clone, PartialEq, Eq, Hash)]
1305 pub struct Genotype(Vec<GenotypeAllele>);
1306}
1307
1308impl fmt::Display for Genotype {
1309 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1310 let Genotype(alleles) = self;
1311 write!(f, "{}", alleles[0])?;
1312 for a in &alleles[1..] {
1313 let sep = match a {
1314 GenotypeAllele::Phased(_) | GenotypeAllele::PhasedMissing => '|',
1315 GenotypeAllele::Unphased(_) | GenotypeAllele::UnphasedMissing => '/',
1316 };
1317 write!(f, "{}{}", sep, a)?;
1318 }
1319 Ok(())
1320 }
1321}
1322
1323/// Lazy representation of genotypes, that does no computation until a particular genotype is queried.
1324#[derive(Debug)]
1325pub struct Genotypes<'a, B>
1326where
1327 B: Borrow<Buffer> + 'a,
1328{
1329 encoded: BufferBacked<'a, Vec<&'a [i32]>, B>,
1330}
1331
1332impl<'a, B: Borrow<Buffer> + 'a> Genotypes<'a, B> {
1333 /// Get genotype of ith sample.
1334 ///
1335 /// Note that the result complies with the BCF spec. This means that the
1336 /// first allele will always be marked as `Unphased`. That is, if you have 1|1 in the VCF,
1337 /// this method will return `[Unphased(1), Phased(1)]`.
1338 pub fn get(&self, i: usize) -> Genotype {
1339 let igt = self.encoded[i];
1340 let allelles = igt
1341 .iter()
1342 .take_while(|&&i| i != VECTOR_END_INTEGER)
1343 .map(|&i| GenotypeAllele::from(i))
1344 .collect();
1345 Genotype(allelles)
1346 }
1347}
1348
1349impl Drop for Record {
1350 fn drop(&mut self) {
1351 unsafe { htslib::bcf_destroy(self.inner) };
1352 }
1353}
1354
1355unsafe impl Send for Record {}
1356
1357unsafe impl Sync for Record {}
1358
1359/// Info tag representation.
1360#[derive(Debug)]
1361pub struct Info<'a, B: BorrowMut<Buffer> + Borrow<Buffer>> {
1362 record: &'a Record,
1363 tag: &'a [u8],
1364 buffer: B,
1365}
1366
1367pub type BufferBackedOption<'b, B> = Option<BufferBacked<'b, Vec<&'b [u8]>, B>>;
1368
1369impl<'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Info<'_, B> {
1370 /// Short description of info tag.
1371 pub fn desc(&self) -> String {
1372 str::from_utf8(self.tag).unwrap().to_owned()
1373 }
1374
1375 fn data(&mut self, data_type: u32) -> Result<Option<i32>> {
1376 let mut n: i32 = self.buffer.borrow().len;
1377 let c_str = ffi::CString::new(self.tag).unwrap();
1378 let ret = unsafe {
1379 htslib::bcf_get_info_values(
1380 self.record.header().inner,
1381 self.record.inner,
1382 c_str.as_ptr() as *mut c_char,
1383 &mut self.buffer.borrow_mut().inner,
1384 &mut n,
1385 data_type as i32,
1386 )
1387 };
1388 self.buffer.borrow_mut().len = n;
1389
1390 match ret {
1391 -1 => Err(Error::BcfUndefinedTag { tag: self.desc() }),
1392 -2 => Err(Error::BcfUnexpectedType { tag: self.desc() }),
1393 -3 => Ok(None),
1394 ret => Ok(Some(ret)),
1395 }
1396 }
1397
1398 /// Get integers from tag. `None` if tag not present in record.
1399 ///
1400 /// Import `bcf::record::Numeric` for missing value handling.
1401 ///
1402 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1403 /// as along as the data is accessed. If parts of the data are accessed while
1404 /// the BufferBacked object is already dropped, you will access unallocated
1405 /// memory.
1406 pub fn integer(mut self) -> Result<Option<BufferBacked<'b, &'b [i32], B>>> {
1407 self.data(htslib::BCF_HT_INT).map(|data| {
1408 data.map(|ret| {
1409 let values = unsafe {
1410 slice::from_raw_parts(self.buffer.borrow().inner as *const i32, ret as usize)
1411 };
1412 BufferBacked::new(&values[..ret as usize], self.buffer)
1413 })
1414 })
1415 }
1416
1417 /// Get floats from tag. `None` if tag not present in record.
1418 ///
1419 /// Import `bcf::record::Numeric` for missing value handling.
1420 ///
1421 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1422 /// as along as the data is accessed. If parts of the data are accessed while
1423 /// the BufferBacked object is already dropped, you will access unallocated
1424 /// memory.
1425 pub fn float(mut self) -> Result<Option<BufferBacked<'b, &'b [f32], B>>> {
1426 self.data(htslib::BCF_HT_REAL).map(|data| {
1427 data.map(|ret| {
1428 let values = unsafe {
1429 slice::from_raw_parts(self.buffer.borrow().inner as *const f32, ret as usize)
1430 };
1431 BufferBacked::new(&values[..ret as usize], self.buffer)
1432 })
1433 })
1434 }
1435
1436 /// Get flags from tag. `false` if not set.
1437 pub fn flag(&mut self) -> Result<bool> {
1438 self.data(htslib::BCF_HT_FLAG).map(|data| match data {
1439 Some(ret) => ret == 1,
1440 None => false,
1441 })
1442 }
1443
1444 /// Get strings from tag. `None` if tag not present in record.
1445 ///
1446 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1447 /// as along as the data is accessed. If parts of the data are accessed while
1448 /// the BufferBacked object is already dropped, you will access unallocated
1449 /// memory.
1450 pub fn string(mut self) -> Result<BufferBackedOption<'b, B>> {
1451 self.data(htslib::BCF_HT_STR).map(|data| {
1452 data.map(|ret| {
1453 BufferBacked::new(
1454 unsafe {
1455 slice::from_raw_parts(self.buffer.borrow().inner as *const u8, ret as usize)
1456 }
1457 .split(|c| *c == b',')
1458 .map(|s| {
1459 // stop at zero character
1460 s.split(|c| *c == 0u8)
1461 .next()
1462 .expect("Bug: returned string should not be empty.")
1463 })
1464 .collect(),
1465 self.buffer,
1466 )
1467 })
1468 })
1469 }
1470}
1471
1472unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Send for Info<'_, B> {}
1473
1474unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Sync for Info<'_, B> {}
1475
1476fn trim_slice<T: PartialEq + NumericUtils>(s: &[T]) -> &[T] {
1477 s.split(|v| v.is_vector_end())
1478 .next()
1479 .expect("Bug: returned slice should not be empty.")
1480}
1481
1482// Representation of per-sample data.
1483#[derive(Debug)]
1484pub struct Format<'a, B: BorrowMut<Buffer> + Borrow<Buffer>> {
1485 record: &'a Record,
1486 tag: &'a [u8],
1487 inner: *mut htslib::bcf_fmt_t,
1488 buffer: B,
1489}
1490
1491impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Format<'a, B> {
1492 /// Create new format data in a given record.
1493 fn new(record: &'a Record, tag: &'a [u8], buffer: B) -> Format<'a, B> {
1494 let c_str = ffi::CString::new(tag).unwrap();
1495 let inner = unsafe {
1496 htslib::bcf_get_fmt(
1497 record.header().inner,
1498 record.inner,
1499 c_str.as_ptr() as *mut c_char,
1500 )
1501 };
1502 Format {
1503 record,
1504 tag,
1505 inner,
1506 buffer,
1507 }
1508 }
1509
1510 /// Provide short description of format entry (just the tag name).
1511 pub fn desc(&self) -> String {
1512 str::from_utf8(self.tag).unwrap().to_owned()
1513 }
1514
1515 pub fn inner(&self) -> &htslib::bcf_fmt_t {
1516 unsafe { &*self.inner }
1517 }
1518
1519 pub fn inner_mut(&mut self) -> &mut htslib::bcf_fmt_t {
1520 unsafe { &mut *self.inner }
1521 }
1522
1523 fn values_per_sample(&self) -> usize {
1524 self.inner().n as usize
1525 }
1526
1527 /// Read and decode format data into a given type.
1528 fn data(&mut self, data_type: u32) -> Result<i32> {
1529 let mut n: i32 = self.buffer.borrow().len;
1530 let c_str = ffi::CString::new(self.tag).unwrap();
1531 let ret = unsafe {
1532 htslib::bcf_get_format_values(
1533 self.record.header().inner,
1534 self.record.inner,
1535 c_str.as_ptr() as *mut c_char,
1536 &mut self.buffer.borrow_mut().inner,
1537 &mut n,
1538 data_type as i32,
1539 )
1540 };
1541 self.buffer.borrow_mut().len = n;
1542 match ret {
1543 -1 => Err(Error::BcfUndefinedTag { tag: self.desc() }),
1544 -2 => Err(Error::BcfUnexpectedType { tag: self.desc() }),
1545 -3 => Err(Error::BcfMissingTag {
1546 tag: self.desc(),
1547 record: self.record.desc(),
1548 }),
1549 ret => Ok(ret),
1550 }
1551 }
1552
1553 /// Get format data as integers.
1554 ///
1555 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1556 /// as long as the data is accessed. If parts of the data are accessed while
1557 /// the BufferBacked object is already dropped, you will access unallocated
1558 /// memory.
1559 pub fn integer(mut self) -> Result<BufferBacked<'b, Vec<&'b [i32]>, B>> {
1560 self.data(htslib::BCF_HT_INT).map(|ret| {
1561 BufferBacked::new(
1562 unsafe {
1563 slice::from_raw_parts(
1564 self.buffer.borrow_mut().inner as *const i32,
1565 ret as usize,
1566 )
1567 }
1568 .chunks(self.values_per_sample())
1569 .map(trim_slice)
1570 .collect(),
1571 self.buffer,
1572 )
1573 })
1574 }
1575
1576 /// Get format data as floats.
1577 ///
1578 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1579 /// as along as the data is accessed. If parts of the data are accessed while
1580 /// the BufferBacked object is already dropped, you will access unallocated
1581 /// memory.
1582 pub fn float(mut self) -> Result<BufferBacked<'b, Vec<&'b [f32]>, B>> {
1583 self.data(htslib::BCF_HT_REAL).map(|ret| {
1584 BufferBacked::new(
1585 unsafe {
1586 slice::from_raw_parts(
1587 self.buffer.borrow_mut().inner as *const f32,
1588 ret as usize,
1589 )
1590 }
1591 .chunks(self.values_per_sample())
1592 .map(trim_slice)
1593 .collect(),
1594 self.buffer,
1595 )
1596 })
1597 }
1598
1599 /// Get format data as byte slices. To obtain the values strings, use `std::str::from_utf8`.
1600 ///
1601 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1602 /// as along as the data is accessed. If parts of the data are accessed while
1603 /// the BufferBacked object is already dropped, you will access unallocated
1604 /// memory.
1605 pub fn string(mut self) -> Result<BufferBacked<'b, Vec<&'b [u8]>, B>> {
1606 self.data(htslib::BCF_HT_STR).map(|ret| {
1607 if ret == 0 {
1608 return BufferBacked::new(Vec::new(), self.buffer);
1609 }
1610 BufferBacked::new(
1611 unsafe {
1612 slice::from_raw_parts(self.buffer.borrow_mut().inner as *const u8, ret as usize)
1613 }
1614 .chunks(self.values_per_sample())
1615 .map(|s| {
1616 // stop at zero character
1617 s.split(|c| *c == 0u8)
1618 .next()
1619 .expect("Bug: returned string should not be empty.")
1620 })
1621 .collect(),
1622 self.buffer,
1623 )
1624 })
1625 }
1626}
1627
1628unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Send for Format<'_, B> {}
1629
1630unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Sync for Format<'_, B> {}
1631
1632#[derive(Debug)]
1633pub struct Filters<'a> {
1634 /// Reference to the `Record` to enumerate records for.
1635 record: &'a Record,
1636 /// Index of the next filter to return, if not at end.
1637 idx: i32,
1638}
1639
1640impl<'a> Filters<'a> {
1641 pub fn new(record: &'a Record) -> Self {
1642 Filters { record, idx: 0 }
1643 }
1644}
1645
1646impl Iterator for Filters<'_> {
1647 type Item = Id;
1648
1649 fn next(&mut self) -> Option<Id> {
1650 if self.record.inner().d.n_flt <= self.idx {
1651 None
1652 } else {
1653 let i = self.idx as isize;
1654 self.idx += 1;
1655 Some(Id(unsafe { *self.record.inner().d.flt.offset(i) } as u32))
1656 }
1657 }
1658}
1659
1660#[cfg(test)]
1661mod tests {
1662 use super::*;
1663 use crate::bcf::{Format, Header, Writer};
1664 use tempfile::NamedTempFile;
1665
1666 #[test]
1667 fn test_missing_float() {
1668 let expected: u32 = 0x7F80_0001;
1669 assert_eq!(MISSING_FLOAT.bits(), expected);
1670 }
1671
1672 #[test]
1673 fn test_vector_end_float() {
1674 let expected: u32 = 0x7F80_0002;
1675 assert_eq!(VECTOR_END_FLOAT.bits(), expected);
1676 }
1677
1678 #[test]
1679 fn test_record_rlen() {
1680 let tmp = NamedTempFile::new().unwrap();
1681 let path = tmp.path();
1682 let header = Header::new();
1683 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1684 let mut record = vcf.empty_record();
1685 assert_eq!(record.rlen(), 0);
1686 let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1687 record.set_alleles(alleles).expect("Failed to set alleles");
1688 assert_eq!(record.rlen(), 3)
1689 }
1690
1691 #[test]
1692 fn test_record_end() {
1693 let tmp = NamedTempFile::new().unwrap();
1694 let path = tmp.path();
1695 let header = Header::new();
1696 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1697 let mut record = vcf.empty_record();
1698 let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1699 record.set_alleles(alleles).expect("Failed to set alleles");
1700 record.set_pos(5);
1701
1702 assert_eq!(record.end(), 8)
1703 }
1704
1705 #[test]
1706 fn test_record_clear() {
1707 let tmp = NamedTempFile::new().unwrap();
1708 let path = tmp.path();
1709 let mut header = Header::new();
1710 header.push_sample("sample".as_bytes());
1711 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1712 let mut record = vcf.empty_record();
1713 let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1714 record.set_alleles(alleles).expect("Failed to set alleles");
1715 record.set_pos(6);
1716 record.clear();
1717
1718 assert_eq!(record.rlen(), 0);
1719 assert_eq!(record.sample_count(), 0);
1720 assert_eq!(record.pos(), 0)
1721 }
1722
1723 #[test]
1724 fn test_record_clone() {
1725 let tmp = NamedTempFile::new().unwrap();
1726 let path = tmp.path();
1727 let header = Header::new();
1728 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1729 let mut record = vcf.empty_record();
1730 let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1731 record.set_alleles(alleles).expect("Failed to set alleles");
1732 record.set_pos(6);
1733
1734 let mut cloned_record = record.clone();
1735 cloned_record.set_pos(5);
1736
1737 assert_eq!(record.pos(), 6);
1738 assert_eq!(record.allele_count(), 2);
1739 assert_eq!(cloned_record.pos(), 5);
1740 assert_eq!(cloned_record.allele_count(), 2);
1741 }
1742
1743 #[test]
1744 fn test_record_has_filter_pass_is_default() {
1745 let tmp = NamedTempFile::new().unwrap();
1746 let path = tmp.path();
1747 let header = Header::new();
1748 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1749 let record = vcf.empty_record();
1750
1751 assert!(record.has_filter("PASS".as_bytes()));
1752 assert!(record.has_filter(".".as_bytes()));
1753 assert!(record.has_filter(&Id(0)));
1754 assert!(!record.has_filter("foo".as_bytes()));
1755 assert!(!record.has_filter(&Id(2)));
1756 }
1757
1758 #[test]
1759 fn test_record_has_filter_custom() {
1760 let tmp = NamedTempFile::new().unwrap();
1761 let path = tmp.path();
1762 let mut header = Header::new();
1763 header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1764 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1765 let mut record = vcf.empty_record();
1766 record.push_filter("foo".as_bytes()).unwrap();
1767
1768 assert!(record.has_filter("foo".as_bytes()));
1769 assert!(!record.has_filter("PASS".as_bytes()))
1770 }
1771
1772 #[test]
1773 fn test_record_push_filter() {
1774 let tmp = NamedTempFile::new().unwrap();
1775 let path = tmp.path();
1776 let mut header = Header::new();
1777 header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1778 header.push_record(br#"##FILTER=<ID=bar,Description="dranks">"#);
1779 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1780 let mut record = vcf.empty_record();
1781 assert!(record.has_filter("PASS".as_bytes()));
1782 record.push_filter("foo".as_bytes()).unwrap();
1783 let bar = record.header().name_to_id(b"bar").unwrap();
1784 record.push_filter(&bar).unwrap();
1785 assert!(record.has_filter("foo".as_bytes()));
1786 assert!(record.has_filter(&bar));
1787 assert!(!record.has_filter("PASS".as_bytes()));
1788 assert!(record.push_filter("baz".as_bytes()).is_err())
1789 }
1790
1791 #[test]
1792 fn test_record_set_filters() {
1793 let tmp = NamedTempFile::new().unwrap();
1794 let path = tmp.path();
1795 let mut header = Header::new();
1796 header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1797 header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
1798 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1799 let mut record = vcf.empty_record();
1800 assert!(record.has_filter("PASS".as_bytes()));
1801 record
1802 .set_filters(&["foo".as_bytes(), "bar".as_bytes()])
1803 .unwrap();
1804 assert!(record.has_filter("foo".as_bytes()));
1805 assert!(record.has_filter("bar".as_bytes()));
1806 assert!(!record.has_filter("PASS".as_bytes()));
1807 let filters: &[&Id] = &[];
1808 record.set_filters(filters).unwrap();
1809 assert!(record.has_filter("PASS".as_bytes()));
1810 assert!(!record.has_filter("foo".as_bytes()));
1811 assert!(record
1812 .set_filters(&["foo".as_bytes(), "baz".as_bytes()])
1813 .is_err())
1814 }
1815
1816 #[test]
1817 fn test_record_remove_filter() {
1818 let tmp = NamedTempFile::new().unwrap();
1819 let path = tmp.path();
1820 let mut header = Header::new();
1821 header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1822 header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
1823 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1824 let mut record = vcf.empty_record();
1825 let foo = record.header().name_to_id(b"foo").unwrap();
1826 let bar = record.header().name_to_id(b"bar").unwrap();
1827 record.set_filters(&[&foo, &bar]).unwrap();
1828 assert!(record.has_filter(&foo));
1829 assert!(record.has_filter(&bar));
1830 record.remove_filter(&foo, true).unwrap();
1831 assert!(!record.has_filter(&foo));
1832 assert!(record.has_filter(&bar));
1833 assert!(record.remove_filter("baz".as_bytes(), true).is_err());
1834 record.remove_filter(&bar, true).unwrap();
1835 assert!(!record.has_filter(&bar));
1836 assert!(record.has_filter("PASS".as_bytes()));
1837 }
1838
1839 #[test]
1840 fn test_record_to_vcf_string_err() {
1841 let tmp = NamedTempFile::new().unwrap();
1842 let path = tmp.path();
1843 let header = Header::new();
1844 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1845 let record = vcf.empty_record();
1846 assert!(record.to_vcf_string().is_err());
1847 }
1848
1849 #[test]
1850 fn test_record_to_vcf_string() {
1851 let tmp = NamedTempFile::new().unwrap();
1852 let path = tmp.path();
1853 let mut header = Header::new();
1854 header.push_record(b"##contig=<ID=chr1,length=1000>");
1855 header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1856 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1857 let mut record = vcf.empty_record();
1858 record.push_filter("foo".as_bytes()).unwrap();
1859 assert_eq!(
1860 record.to_vcf_string().unwrap(),
1861 "chr1\t1\t.\t.\t.\t0\tfoo\t.\n"
1862 );
1863 }
1864}