Skip to main content

rust_htslib/bcf/
record.rs

1// Copyright 2014 Johannes Köster.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6use std::borrow::{Borrow, BorrowMut};
7use std::fmt;
8use std::marker::PhantomData;
9use std::ops::Deref;
10use std::os::raw::c_char;
11use std::ptr;
12use std::slice;
13use std::str;
14use std::sync::Arc;
15use std::{ffi, iter};
16
17use bio_types::genome;
18use derive_new::new;
19use ieee754::Ieee754;
20use lazy_static::lazy_static;
21
22use crate::bcf::header::{HeaderView, Id};
23use crate::bcf::Error;
24use crate::errors::Result;
25use crate::htslib;
26
27const MISSING_INTEGER: i32 = i32::MIN;
28const VECTOR_END_INTEGER: i32 = i32::MIN + 1;
29
30lazy_static! {
31    static ref MISSING_FLOAT: f32 = Ieee754::from_bits(0x7F80_0001);
32    static ref VECTOR_END_FLOAT: f32 = Ieee754::from_bits(0x7F80_0002);
33}
34
35/// Common methods for numeric INFO and FORMAT entries
36pub trait Numeric {
37    /// Return true if entry is a missing value
38    fn is_missing(&self) -> bool;
39
40    /// Return missing value for storage in BCF record.
41    fn missing() -> Self;
42}
43
44impl Numeric for f32 {
45    fn is_missing(&self) -> bool {
46        self.bits() == MISSING_FLOAT.bits()
47    }
48
49    fn missing() -> f32 {
50        *MISSING_FLOAT
51    }
52}
53
54impl Numeric for i32 {
55    fn is_missing(&self) -> bool {
56        *self == MISSING_INTEGER
57    }
58
59    fn missing() -> i32 {
60        MISSING_INTEGER
61    }
62}
63
64trait NumericUtils {
65    /// Return true if entry marks the end of the record.
66    fn is_vector_end(&self) -> bool;
67}
68
69impl NumericUtils for f32 {
70    fn is_vector_end(&self) -> bool {
71        self.bits() == VECTOR_END_FLOAT.bits()
72    }
73}
74
75impl NumericUtils for i32 {
76    fn is_vector_end(&self) -> bool {
77        *self == VECTOR_END_INTEGER
78    }
79}
80
81/// A trait to allow for seamless use of bytes or integer identifiers for filters
82pub trait FilterId {
83    fn id_from_header(&self, header: &HeaderView) -> Result<Id>;
84    fn is_pass(&self) -> bool;
85}
86
87impl FilterId for [u8] {
88    fn id_from_header(&self, header: &HeaderView) -> Result<Id> {
89        header.name_to_id(self)
90    }
91    fn is_pass(&self) -> bool {
92        matches!(self, b"PASS" | b".")
93    }
94}
95
96impl FilterId for Id {
97    fn id_from_header(&self, _header: &HeaderView) -> Result<Id> {
98        Ok(*self)
99    }
100    fn is_pass(&self) -> bool {
101        *self == Id(0)
102    }
103}
104
105/// A buffer for info or format data.
106#[derive(Debug)]
107pub struct Buffer {
108    inner: *mut ::std::os::raw::c_void,
109    len: i32,
110}
111
112impl Buffer {
113    pub fn new() -> Self {
114        Buffer {
115            inner: ptr::null_mut(),
116            len: 0,
117        }
118    }
119}
120
121impl Default for Buffer {
122    fn default() -> Self {
123        Self::new()
124    }
125}
126
127impl Drop for Buffer {
128    fn drop(&mut self) {
129        unsafe {
130            ::libc::free(self.inner);
131        }
132    }
133}
134
135#[derive(new, Debug)]
136pub struct BufferBacked<'a, T: 'a + fmt::Debug, B: Borrow<Buffer> + 'a> {
137    value: T,
138    _buffer: B,
139    #[new(default)]
140    phantom: PhantomData<&'a B>,
141}
142
143impl<'a, T: 'a + fmt::Debug, B: Borrow<Buffer> + 'a> Deref for BufferBacked<'a, T, B> {
144    type Target = T;
145
146    fn deref(&self) -> &T {
147        &self.value
148    }
149}
150
151impl<'a, T: 'a + fmt::Debug + fmt::Display, B: Borrow<Buffer> + 'a> fmt::Display
152    for BufferBacked<'a, T, B>
153{
154    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155        fmt::Display::fmt(&self.value, f)
156    }
157}
158
159/// A VCF/BCF record.
160/// New records can be created by the `empty_record` methods of [`bcf::Reader`](crate::bcf::Reader)
161/// and [`bcf::Writer`](crate::bcf::Writer).
162/// # Example
163/// ```rust
164/// use rust_htslib::bcf::{Format, Writer};
165/// use rust_htslib::bcf::header::Header;
166///
167/// // Create minimal VCF header with a single sample
168/// let mut header = Header::new();
169/// header.push_sample("sample".as_bytes());
170///
171/// // Write uncompressed VCF to stdout with above header and get an empty record
172/// let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
173/// let mut record = vcf.empty_record();
174/// ```
175#[derive(Debug)]
176pub struct Record {
177    pub inner: *mut htslib::bcf1_t,
178    header: Arc<HeaderView>,
179}
180
181impl Record {
182    /// Construct record with reference to header `HeaderView`, for create-internal use.
183    pub fn new(header: Arc<HeaderView>) -> Self {
184        let inner = unsafe {
185            let inner = htslib::bcf_init();
186            // Always unpack record.
187            htslib::bcf_unpack(inner, htslib::BCF_UN_ALL as i32);
188            inner
189        };
190        Record { inner, header }
191    }
192
193    /// Force unpacking of internal record values.
194    pub fn unpack(&mut self) {
195        unsafe { htslib::bcf_unpack(self.inner, htslib::BCF_UN_ALL as i32) };
196    }
197
198    /// Return associated header.
199    pub fn header(&self) -> &HeaderView {
200        self.header.as_ref()
201    }
202
203    /// Translate the record to the given header.
204    pub fn translate(&mut self, dst_header: &mut Arc<HeaderView>) -> Result<()> {
205        if unsafe { htslib::bcf_translate(dst_header.inner, self.header().inner, self.inner) } == 0
206        {
207            self.set_header(Arc::clone(dst_header));
208            Ok(())
209        } else {
210            Err(Error::BcfTranslate)
211        }
212    }
213
214    /// Set the record header.
215    pub(crate) fn set_header(&mut self, header: Arc<HeaderView>) {
216        self.header = header;
217    }
218
219    /// Return reference to the inner C struct.
220    ///
221    /// # Remarks
222    ///
223    /// Note that this function is only required as long as Rust-Htslib does not provide full
224    /// access to all aspects of Htslib.
225    pub fn inner(&self) -> &htslib::bcf1_t {
226        unsafe { &*self.inner }
227    }
228
229    /// Return mutable reference to inner C struct.
230    ///
231    /// # Remarks
232    ///
233    /// Note that this function is only required as long as Rust-Htslib does not provide full
234    /// access to all aspects of Htslib.
235    pub fn inner_mut(&mut self) -> &mut htslib::bcf1_t {
236        unsafe { &mut *self.inner }
237    }
238
239    /// Get the reference id of the record.
240    ///
241    /// To look up the contig name,
242    /// use [`HeaderView::rid2name`](../header/struct.HeaderView.html#method.rid2name).
243    ///
244    /// # Returns
245    ///
246    /// - `Some(rid)` if the internal `rid` is set to a value that is not `-1`
247    /// - `None` if the internal `rid` is set to `-1`
248    pub fn rid(&self) -> Option<u32> {
249        match self.inner().rid {
250            -1 => None,
251            rid => Some(rid as u32),
252        }
253    }
254
255    /// Update the reference id of the record.
256    ///
257    /// To look up reference id for a contig name,
258    /// use [`HeaderView::name2rid`](../header/struct.HeaderView.html#method.name2rid).
259    ///
260    /// # Example
261    ///
262    /// Example assumes we have a Record `record` from a VCF with a header containing region
263    /// named `1`. See [module documentation](../index.html#example-writing) for how to set
264    /// up VCF, header, and record.
265    ///
266    /// ```
267    /// # use rust_htslib::bcf::{Format, Writer};
268    /// # use rust_htslib::bcf::header::Header;
269    /// # let mut header = Header::new();
270    /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
271    /// # header.push_record(header_contig_line.as_bytes());
272    /// # header.push_sample("test_sample".as_bytes());
273    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
274    /// # let mut record = vcf.empty_record();
275    /// let rid = record.header().name2rid(b"1").ok();
276    /// record.set_rid(rid);
277    /// assert_eq!(record.rid(), rid);
278    /// let name = record.header().rid2name(record.rid().unwrap()).ok();
279    /// assert_eq!(Some("1".as_bytes()), name);
280    /// ```
281    pub fn set_rid(&mut self, rid: Option<u32>) {
282        match rid {
283            Some(rid) => self.inner_mut().rid = rid as i32,
284            None => self.inner_mut().rid = -1,
285        }
286    }
287
288    /// Return **0-based** position
289    pub fn pos(&self) -> i64 {
290        self.inner().pos
291    }
292
293    /// Set **0-based** position
294    pub fn set_pos(&mut self, pos: i64) {
295        self.inner_mut().pos = pos;
296    }
297
298    /// Return the **0-based, exclusive** end position
299    ///
300    /// # Example
301    /// ```rust
302    /// # use rust_htslib::bcf::{Format, Header, Writer};
303    /// # use tempfile::NamedTempFile;
304    /// # let tmp = NamedTempFile::new().unwrap();
305    /// # let path = tmp.path();
306    /// # let header = Header::new();
307    /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
308    /// # let mut record = vcf.empty_record();
309    /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
310    /// record.set_alleles(alleles).expect("Failed to set alleles");
311    /// record.set_pos(5);
312    ///
313    /// assert_eq!(record.end(), 8)
314    /// ```
315    pub fn end(&self) -> i64 {
316        self.pos() + self.rlen()
317    }
318
319    /// Return the value of the ID column.
320    ///
321    /// When empty, returns `b".".to_vec()`.
322    pub fn id(&self) -> Vec<u8> {
323        if self.inner().d.id.is_null() {
324            b".".to_vec()
325        } else {
326            let id = unsafe { ffi::CStr::from_ptr(self.inner().d.id) };
327            id.to_bytes().to_vec()
328        }
329    }
330
331    /// Update the ID string to the given value.
332    pub fn set_id(&mut self, id: &[u8]) -> Result<()> {
333        let c_str = ffi::CString::new(id).unwrap();
334        if unsafe {
335            htslib::bcf_update_id(
336                self.header().inner,
337                self.inner,
338                c_str.as_ptr() as *mut c_char,
339            )
340        } == 0
341        {
342            Ok(())
343        } else {
344            Err(Error::BcfSetValues)
345        }
346    }
347
348    /// Clear the ID column (set it to `"."`).
349    pub fn clear_id(&mut self) -> Result<()> {
350        let c_str = ffi::CString::new(&b"."[..]).unwrap();
351        if unsafe {
352            htslib::bcf_update_id(
353                self.header().inner,
354                self.inner,
355                c_str.as_ptr() as *mut c_char,
356            )
357        } == 0
358        {
359            Ok(())
360        } else {
361            Err(Error::BcfSetValues)
362        }
363    }
364
365    /// Add the ID string (the ID field is semicolon-separated), checking for duplicates.
366    pub fn push_id(&mut self, id: &[u8]) -> Result<()> {
367        let c_str = ffi::CString::new(id).unwrap();
368        if unsafe {
369            htslib::bcf_add_id(
370                self.header().inner,
371                self.inner,
372                c_str.as_ptr() as *mut c_char,
373            )
374        } == 0
375        {
376            Ok(())
377        } else {
378            Err(Error::BcfSetValues)
379        }
380    }
381
382    /// Return `Filters` iterator for enumerating all filters that have been set.
383    ///
384    /// A record having the `PASS` filter will return an empty `Filter` here.
385    pub fn filters(&self) -> Filters<'_> {
386        Filters::new(self)
387    }
388
389    /// Query whether the filter with the given ID has been set.
390    ///
391    /// This method can be used to check if a record passes filtering by using either `Id(0)`,
392    /// `PASS` or `.`
393    ///
394    /// # Example
395    /// ```rust
396    /// # use rust_htslib::bcf::{Format, Header, Writer};
397    /// # use rust_htslib::bcf::header::Id;
398    /// # use tempfile::NamedTempFile;
399    /// # let tmp = tempfile::NamedTempFile::new().unwrap();
400    /// # let path = tmp.path();
401    /// let mut header = Header::new();
402    /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
403    /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
404    /// # let mut record = vcf.empty_record();
405    /// assert!(record.has_filter("PASS".as_bytes()));
406    /// assert!(record.has_filter(".".as_bytes()));
407    /// assert!(record.has_filter(&Id(0)));
408    ///
409    /// record.push_filter("foo".as_bytes()).unwrap();
410    /// assert!(record.has_filter("foo".as_bytes()));
411    /// assert!(!record.has_filter("PASS".as_bytes()))
412    /// ```
413    pub fn has_filter<T: FilterId + ?Sized>(&self, flt_id: &T) -> bool {
414        if flt_id.is_pass() && self.inner().d.n_flt == 0 {
415            return true;
416        }
417        let id = match flt_id.id_from_header(self.header()) {
418            Ok(i) => *i,
419            Err(_) => return false,
420        };
421        for i in 0..(self.inner().d.n_flt as isize) {
422            if unsafe { *self.inner().d.flt.offset(i) } == id as i32 {
423                return true;
424            }
425        }
426        false
427    }
428
429    /// Set the given filter IDs to the FILTER column.
430    ///
431    /// Setting an empty slice removes all filters and sets `PASS`.
432    ///
433    /// # Example
434    /// ```rust
435    /// # use rust_htslib::bcf::{Format, Header, Writer};
436    /// # use rust_htslib::bcf::header::Id;
437    /// # use tempfile::NamedTempFile;
438    /// # let tmp = tempfile::NamedTempFile::new().unwrap();
439    /// # let path = tmp.path();
440    /// let mut header = Header::new();
441    /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
442    /// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
443    /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
444    /// # let mut record = vcf.empty_record();
445    /// let foo = record.header().name_to_id(b"foo").unwrap();
446    /// let bar = record.header().name_to_id(b"bar").unwrap();
447    /// assert!(record.has_filter("PASS".as_bytes()));
448    /// let mut filters = vec![&foo, &bar];
449    /// record.set_filters(&filters).unwrap();
450    /// assert!(record.has_filter(&foo));
451    /// assert!(record.has_filter(&bar));
452    /// assert!(!record.has_filter("PASS".as_bytes()));
453    /// filters.clear();
454    /// record.set_filters(&filters).unwrap();
455    /// assert!(record.has_filter("PASS".as_bytes()));
456    /// assert!(!record.has_filter("foo".as_bytes()));
457    /// // 'baz' isn't in the header
458    /// assert!(record.set_filters(&["baz".as_bytes()]).is_err())
459    /// ```
460    ///
461    /// # Errors
462    /// If any of the filter IDs do not exist in the header, an [`Error::BcfUnknownID`] is returned.
463    ///
464    pub fn set_filters<T: FilterId + ?Sized>(&mut self, flt_ids: &[&T]) -> Result<()> {
465        let mut ids: Vec<i32> = flt_ids
466            .iter()
467            .map(|id| id.id_from_header(self.header()).map(|id| *id as i32))
468            .collect::<Result<Vec<i32>>>()?;
469        unsafe {
470            htslib::bcf_update_filter(
471                self.header().inner,
472                self.inner,
473                ids.as_mut_ptr(),
474                ids.len() as i32,
475            );
476        };
477        Ok(())
478    }
479
480    /// Add the given filter to the FILTER column.
481    ///
482    /// If `flt_id` is `PASS` or `.` then all existing filters are removed first. Otherwise,
483    /// any existing `PASS` filter is removed.
484    ///
485    /// # Example
486    /// ```rust
487    /// # use rust_htslib::bcf::{Format, Header, Writer};
488    /// # use tempfile::NamedTempFile;
489    /// # let tmp = tempfile::NamedTempFile::new().unwrap();
490    /// # let path = tmp.path();
491    /// let mut header = Header::new();
492    /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
493    /// header.push_record(br#"##FILTER=<ID=bar,Description="dranks">"#);
494    /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
495    /// # let mut record = vcf.empty_record();
496    /// let foo = "foo".as_bytes();
497    /// let bar = record.header().name_to_id(b"bar").unwrap();
498    /// assert!(record.has_filter("PASS".as_bytes()));
499    ///
500    /// record.push_filter(foo).unwrap();
501    /// record.push_filter(&bar).unwrap();
502    /// assert!(record.has_filter(foo));
503    /// assert!(record.has_filter(&bar));
504    /// // filter must exist in the header
505    /// assert!(record.push_filter("baz".as_bytes()).is_err())
506    /// ```
507    ///
508    /// # Errors
509    /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned.
510    ///
511    pub fn push_filter<T: FilterId + ?Sized>(&mut self, flt_id: &T) -> Result<()> {
512        let id = flt_id.id_from_header(self.header())?;
513        unsafe {
514            htslib::bcf_add_filter(self.header().inner, self.inner, *id as i32);
515        };
516        Ok(())
517    }
518
519    /// Remove the given filter from the FILTER column.
520    ///
521    /// # Arguments
522    ///
523    /// - `flt_id` - The corresponding filter ID to remove.
524    /// - `pass_on_empty` - Set to `PASS` when removing the last filter.
525    ///
526    /// # Example
527    /// ```rust
528    /// # use rust_htslib::bcf::{Format, Header, Writer};
529    /// # use tempfile::NamedTempFile;
530    /// # let tmp = tempfile::NamedTempFile::new().unwrap();
531    /// # let path = tmp.path();
532    /// let mut header = Header::new();
533    /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
534    /// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
535    /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
536    /// # let mut record = vcf.empty_record();
537    /// let foo = "foo".as_bytes();
538    /// let bar = "bar".as_bytes();
539    /// record.set_filters(&[foo, bar]).unwrap();
540    /// assert!(record.has_filter(foo));
541    /// assert!(record.has_filter(bar));
542    ///
543    /// record.remove_filter(foo, true).unwrap();
544    /// assert!(!record.has_filter(foo));
545    /// assert!(record.has_filter(bar));
546    /// // 'baz' is not in the header
547    /// assert!(record.remove_filter("baz".as_bytes(), true).is_err());
548    ///
549    /// record.remove_filter(bar, true).unwrap();
550    /// assert!(!record.has_filter(bar));
551    /// assert!(record.has_filter("PASS".as_bytes()));
552    /// ```
553    ///
554    /// # Errors
555    /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned.
556    ///
557    pub fn remove_filter<T: FilterId + ?Sized>(
558        &mut self,
559        flt_id: &T,
560        pass_on_empty: bool,
561    ) -> Result<()> {
562        let id = flt_id.id_from_header(self.header())?;
563        unsafe {
564            htslib::bcf_remove_filter(
565                self.header().inner,
566                self.inner,
567                *id as i32,
568                pass_on_empty as i32,
569            )
570        };
571        Ok(())
572    }
573
574    /// Get alleles strings.
575    ///
576    /// The first allele is the reference allele.
577    pub fn alleles(&self) -> Vec<&[u8]> {
578        unsafe { htslib::bcf_unpack(self.inner, htslib::BCF_UN_ALL as i32) };
579        let n = self.inner().n_allele() as usize;
580        let dec = self.inner().d;
581        let alleles = unsafe { slice::from_raw_parts(dec.allele, n) };
582        (0..n)
583            .map(|i| unsafe { ffi::CStr::from_ptr(alleles[i]).to_bytes() })
584            .collect()
585    }
586
587    /// Set alleles. The first allele is the reference allele.
588    ///
589    /// # Example
590    /// ```rust
591    /// # use rust_htslib::bcf::{Format, Writer};
592    /// # use rust_htslib::bcf::header::Header;
593    /// #
594    /// # // Create minimal VCF header with a single sample
595    /// # let mut header = Header::new();
596    /// # header.push_sample("sample".as_bytes());
597    /// #
598    /// # // Write uncompressed VCF to stdout with above header and get an empty record
599    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
600    /// # let mut record = vcf.empty_record();
601    /// assert_eq!(record.allele_count(), 0);
602    ///
603    /// let alleles: &[&[u8]] = &[b"A", b"TG"];
604    /// record.set_alleles(alleles).expect("Failed to set alleles");
605    /// assert_eq!(record.allele_count(), 2)
606    /// ```
607    pub fn set_alleles(&mut self, alleles: &[&[u8]]) -> Result<()> {
608        let cstrings: Vec<ffi::CString> = alleles
609            .iter()
610            .map(|vec| ffi::CString::new(*vec).unwrap())
611            .collect();
612        let mut ptrs: Vec<*const c_char> = cstrings
613            .iter()
614            .map(|cstr| cstr.as_ptr() as *const c_char)
615            .collect();
616        if unsafe {
617            htslib::bcf_update_alleles(
618                self.header().inner,
619                self.inner,
620                ptrs.as_mut_ptr(),
621                alleles.len() as i32,
622            )
623        } == 0
624        {
625            Ok(())
626        } else {
627            Err(Error::BcfSetValues)
628        }
629    }
630
631    /// Get variant quality.
632    pub fn qual(&self) -> f32 {
633        self.inner().qual
634    }
635
636    /// Set variant quality.
637    pub fn set_qual(&mut self, qual: f32) {
638        self.inner_mut().qual = qual;
639    }
640
641    pub fn info<'a>(&'a self, tag: &'a [u8]) -> Info<'a, Buffer> {
642        self.info_shared_buffer(tag, Buffer::new())
643    }
644
645    /// Get the value of the given info tag.
646    pub fn info_shared_buffer<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b>(
647        &'a self,
648        tag: &'a [u8],
649        buffer: B,
650    ) -> Info<'a, B> {
651        Info {
652            record: self,
653            tag,
654            buffer,
655        }
656    }
657
658    /// Get the number of samples in the record.
659    pub fn sample_count(&self) -> u32 {
660        self.inner().n_sample()
661    }
662
663    /// Get the number of alleles, including reference allele.
664    pub fn allele_count(&self) -> u32 {
665        self.inner().n_allele()
666    }
667
668    /// Add/replace genotypes in FORMAT GT tag.
669    ///
670    /// # Arguments
671    ///
672    /// - `genotypes` - a flattened, two-dimensional array of GenotypeAllele,
673    ///   the first dimension contains one array for each sample.
674    ///
675    /// # Errors
676    ///
677    /// Returns error if GT tag is not present in header.
678    ///
679    /// # Example
680    ///
681    /// Example assumes we have a Record `record` from a VCF with a `GT` `FORMAT` tag.
682    /// See [module documentation](../index.html#example-writing) for how to set up
683    /// VCF, header, and record.
684    ///
685    /// ```
686    /// # use rust_htslib::bcf::{Format, Writer};
687    /// # use rust_htslib::bcf::header::Header;
688    /// # use rust_htslib::bcf::record::GenotypeAllele;
689    /// # let mut header = Header::new();
690    /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
691    /// # header.push_record(header_contig_line.as_bytes());
692    /// # let header_gt_line = r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#;
693    /// # header.push_record(header_gt_line.as_bytes());
694    /// # header.push_sample("test_sample".as_bytes());
695    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
696    /// # let mut record = vcf.empty_record();
697    /// let alleles = &[GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(1)];
698    /// record.push_genotypes(alleles);
699    /// assert_eq!("1/1", &format!("{}", record.genotypes().unwrap().get(0)));
700    /// ```
701    pub fn push_genotypes(&mut self, genotypes: &[GenotypeAllele]) -> Result<()> {
702        let encoded: Vec<i32> = genotypes.iter().map(|gt| i32::from(*gt)).collect();
703        self.push_format_integer(b"GT", &encoded)
704    }
705
706    /// Add/replace genotypes in FORMAT GT tag by providing a list of genotypes.
707    ///
708    /// # Arguments
709    ///
710    /// - `genotypes` - a two-dimensional array of GenotypeAllele
711    /// - `max_ploidy` - the maximum number of alleles allowed for any genotype on any sample.
712    ///
713    /// # Errors
714    ///
715    /// Returns an error if any genotype has more allelles than `max_ploidy` or if the GT tag is not present in the header.
716    ///
717    /// # Example
718    ///
719    /// Example assumes we have a Record `record` from a VCF with a `GT` `FORMAT` tag and three samples.
720    /// See [module documentation](../index.html#example-writing) for how to set up
721    /// VCF, header, and record.
722    ///
723    /// ```
724    /// # use rust_htslib::bcf::{Format, Writer};
725    /// # use rust_htslib::bcf::header::Header;
726    /// # use rust_htslib::bcf::record::GenotypeAllele;
727    /// # use std::iter;
728    /// # let mut header = Header::new();
729    /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
730    /// # header.push_record(header_contig_line.as_bytes());
731    /// # let header_gt_line = r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#;
732    /// # header.push_record(header_gt_line.as_bytes());
733    /// # header.push_sample("first_sample".as_bytes());
734    /// # header.push_sample("second_sample".as_bytes());
735    /// # header.push_sample("third_sample".as_bytes());
736    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf)?;
737    /// # let mut record = vcf.empty_record();
738    /// let alleles = vec![
739    ///     vec![GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(1)],
740    ///     vec![GenotypeAllele::Unphased(0), GenotypeAllele::Phased(1)],
741    ///     vec![GenotypeAllele::Unphased(0)],
742    /// ];
743    /// record.push_genotype_structured(&alleles, 2);
744    /// let gts = record.genotypes()?;
745    /// assert_eq!("1/1", &format!("{}", gts.get(0)));
746    /// assert_eq!("0|1", &format!("{}", gts.get(1)));
747    /// assert_eq!("0", &format!("{}", gts.get(2)));
748    /// # Ok::<(), rust_htslib::errors::Error>(())
749    /// ```
750    pub fn push_genotype_structured<GT>(
751        &mut self,
752        genotypes: &[GT],
753        max_ploidy: usize,
754    ) -> Result<()>
755    where
756        GT: AsRef<[GenotypeAllele]>,
757    {
758        let mut data = Vec::with_capacity(max_ploidy * genotypes.len());
759        for gt in genotypes {
760            if gt.as_ref().len() > max_ploidy {
761                return Err(Error::BcfSetValues);
762            }
763            data.extend(
764                gt.as_ref()
765                    .iter()
766                    .map(|gta| i32::from(*gta))
767                    .chain(iter::repeat_n(
768                        VECTOR_END_INTEGER,
769                        max_ploidy - gt.as_ref().len(),
770                    )),
771            );
772        }
773        self.push_format_integer(b"GT", &data)
774    }
775
776    /// Get genotypes as vector of one `Genotype` per sample.
777    ///
778    /// # Example
779    /// Parsing genotype field (`GT` tag) from a VCF record:
780    /// ```
781    /// use crate::rust_htslib::bcf::{Reader, Read};
782    /// let mut vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
783    /// let expected = ["./1", "1|1", "0/1", "0|1", "1|.", "1/1"];
784    /// for (rec, exp_gt) in vcf.records().zip(expected.iter()) {
785    ///     let mut rec = rec.expect("Error reading record.");
786    ///     let genotypes = rec.genotypes().expect("Error reading genotypes");
787    ///     assert_eq!(&format!("{}", genotypes.get(0)), exp_gt);
788    /// }
789    /// ```
790    pub fn genotypes(&self) -> Result<Genotypes<'_, Buffer>> {
791        self.genotypes_shared_buffer(Buffer::new())
792    }
793
794    /// Get genotypes as vector of one `Genotype` per sample, using a given shared buffer
795    /// to avoid unnecessary allocations.
796    pub fn genotypes_shared_buffer<'a, B>(&self, buffer: B) -> Result<Genotypes<'a, B>>
797    where
798        B: BorrowMut<Buffer> + Borrow<Buffer> + 'a,
799    {
800        Ok(Genotypes {
801            encoded: self.format_shared_buffer(b"GT", buffer).integer()?,
802        })
803    }
804
805    /// Retrieve data for a `FORMAT` field
806    ///
807    /// # Example
808    /// *Note: some boilerplate for the example is hidden for clarity. See [module documentation](../index.html#example-writing)
809    /// for an example of the setup used here.*
810    ///
811    /// ```rust
812    /// # use rust_htslib::bcf::{Format, Writer};
813    /// # use rust_htslib::bcf::header::Header;
814    /// #
815    /// # // Create minimal VCF header with a single sample
816    /// # let mut header = Header::new();
817    /// header.push_sample(b"sample1").push_sample(b"sample2").push_record(br#"##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">"#);
818    /// #
819    /// # // Write uncompressed VCF to stdout with above header and get an empty record
820    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
821    /// # let mut record = vcf.empty_record();
822    /// record.push_format_integer(b"DP", &[20, 12]).expect("Failed to set DP format field");
823    ///
824    /// let read_depths = record.format(b"DP").integer().expect("Couldn't retrieve DP field");
825    /// let sample1_depth = read_depths[0];
826    /// assert_eq!(sample1_depth, &[20]);
827    /// let sample2_depth = read_depths[1];
828    /// assert_eq!(sample2_depth, &[12])
829    /// ```
830    ///
831    /// # Errors
832    /// **Attention:** the returned [`BufferBacked`] from [`integer()`](Format::integer)
833    /// (`read_depths`), which holds the data, has to be kept in scope as long as the data is
834    /// accessed. If parts of the data are accessed after the `BufferBacked` object is been
835    /// dropped, you will access unallocated memory.
836    pub fn format<'a>(&'a self, tag: &'a [u8]) -> Format<'a, Buffer> {
837        self.format_shared_buffer(tag, Buffer::new())
838    }
839
840    /// Get the value of the given format tag for each sample.
841    pub fn format_shared_buffer<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b>(
842        &'a self,
843        tag: &'a [u8],
844        buffer: B,
845    ) -> Format<'a, B> {
846        Format::new(self, tag, buffer)
847    }
848
849    /// Add/replace an integer-typed FORMAT tag.
850    ///
851    /// # Arguments
852    ///
853    /// - `tag` - The tag's string.
854    /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
855    ///   for each sample.
856    ///
857    /// # Errors
858    ///
859    /// Returns error if tag is not present in header.
860    pub fn push_format_integer(&mut self, tag: &[u8], data: &[i32]) -> Result<()> {
861        self.push_format(tag, data, htslib::BCF_HT_INT)
862    }
863
864    /// Add/replace a float-typed FORMAT tag.
865    ///
866    /// # Arguments
867    ///
868    /// - `tag` - The tag's string.
869    /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
870    ///   for each sample.
871    ///
872    /// # Errors
873    ///
874    /// Returns error if tag is not present in header.
875    ///
876    /// # Example
877    ///
878    /// Example assumes we have a Record `record` from a VCF with an `AF` `FORMAT` tag.
879    /// See [module documentation](../index.html#example-writing) for how to set up
880    /// VCF, header, and record.
881    ///
882    /// ```
883    /// # use rust_htslib::bcf::{Format, Writer};
884    /// # use rust_htslib::bcf::header::Header;
885    /// # use rust_htslib::bcf::record::GenotypeAllele;
886    /// # let mut header = Header::new();
887    /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
888    /// # header.push_record(header_contig_line.as_bytes());
889    /// # let header_af_line = r#"##FORMAT=<ID=AF,Number=1,Type=Float,Description="Frequency">"#;
890    /// # header.push_record(header_af_line.as_bytes());
891    /// # header.push_sample("test_sample".as_bytes());
892    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
893    /// # let mut record = vcf.empty_record();
894    /// record.push_format_float(b"AF", &[0.5]);
895    /// assert_eq!(0.5, record.format(b"AF").float().unwrap()[0][0]);
896    /// ```
897    pub fn push_format_float(&mut self, tag: &[u8], data: &[f32]) -> Result<()> {
898        self.push_format(tag, data, htslib::BCF_HT_REAL)
899    }
900
901    /// Add/replace a single-char-typed FORMAT tag.
902    ///
903    /// # Arguments
904    ///
905    /// - `tag` - The tag's string.
906    /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
907    ///   for each sample.
908    ///
909    /// # Errors
910    ///
911    /// Returns error if tag is not present in header.
912    pub fn push_format_char(&mut self, tag: &[u8], data: &[u8]) -> Result<()> {
913        self.push_format(tag, data, htslib::BCF_HT_STR)
914    }
915
916    /// Add a format tag. Data is a flattened two-dimensional array.
917    /// The first dimension contains one array for each sample.
918    fn push_format<T>(&mut self, tag: &[u8], data: &[T], ht: u32) -> Result<()> {
919        let tag_c_str = ffi::CString::new(tag).unwrap();
920        unsafe {
921            if htslib::bcf_update_format(
922                self.header().inner,
923                self.inner,
924                tag_c_str.as_ptr() as *mut c_char,
925                data.as_ptr() as *const ::std::os::raw::c_void,
926                data.len() as i32,
927                ht as i32,
928            ) == 0
929            {
930                Ok(())
931            } else {
932                Err(Error::BcfSetTag {
933                    tag: str::from_utf8(tag).unwrap().to_owned(),
934                })
935            }
936        }
937    }
938
939    // TODO: should we add convenience methods clear_format_*?
940
941    /// Add a string-typed FORMAT tag. Note that genotypes are treated as a special case
942    /// and cannot be added with this method. See instead [push_genotypes](#method.push_genotypes).
943    ///
944    /// # Arguments
945    ///
946    /// - `tag` - The tag's string.
947    /// - `data` - a two-dimensional array, the first dimension contains one array
948    ///   for each sample. Must be non-empty.
949    ///
950    /// # Errors
951    ///
952    /// Returns error if tag is not present in header.
953    pub fn push_format_string<D: Borrow<[u8]>>(&mut self, tag: &[u8], data: &[D]) -> Result<()> {
954        assert!(
955            !data.is_empty(),
956            "given string data must have at least 1 element"
957        );
958        let c_data = data
959            .iter()
960            .map(|s| ffi::CString::new(s.borrow()).unwrap())
961            .collect::<Vec<ffi::CString>>();
962        let c_ptrs = c_data
963            .iter()
964            .map(|s| s.as_ptr() as *mut i8)
965            .collect::<Vec<*mut i8>>();
966        let tag_c_str = ffi::CString::new(tag).unwrap();
967        unsafe {
968            if htslib::bcf_update_format_string(
969                self.header().inner,
970                self.inner,
971                tag_c_str.as_ptr() as *mut c_char,
972                c_ptrs.as_slice().as_ptr() as *mut *const c_char,
973                data.len() as i32,
974            ) == 0
975            {
976                Ok(())
977            } else {
978                Err(Error::BcfSetTag {
979                    tag: str::from_utf8(tag).unwrap().to_owned(),
980                })
981            }
982        }
983    }
984
985    /// Add/replace an integer-typed INFO entry.
986    pub fn push_info_integer(&mut self, tag: &[u8], data: &[i32]) -> Result<()> {
987        self.push_info(tag, data, htslib::BCF_HT_INT)
988    }
989
990    /// Remove the integer-typed INFO entry.
991    pub fn clear_info_integer(&mut self, tag: &[u8]) -> Result<()> {
992        self.push_info::<i32>(tag, &[], htslib::BCF_HT_INT)
993    }
994
995    /// Add/replace a float-typed INFO entry.
996    pub fn push_info_float(&mut self, tag: &[u8], data: &[f32]) -> Result<()> {
997        self.push_info(tag, data, htslib::BCF_HT_REAL)
998    }
999
1000    /// Remove the float-typed INFO entry.
1001    pub fn clear_info_float(&mut self, tag: &[u8]) -> Result<()> {
1002        self.push_info::<u8>(tag, &[], htslib::BCF_HT_REAL)
1003    }
1004
1005    /// Add/replace an INFO tag.
1006    ///
1007    /// # Arguments
1008    /// * `tag` - the tag to add/replace
1009    /// * `data` - the data to set
1010    /// * `ht` - the HTSLib type to use
1011    fn push_info<T>(&mut self, tag: &[u8], data: &[T], ht: u32) -> Result<()> {
1012        let tag_c_str = ffi::CString::new(tag).unwrap();
1013        unsafe {
1014            if htslib::bcf_update_info(
1015                self.header().inner,
1016                self.inner,
1017                tag_c_str.as_ptr() as *mut c_char,
1018                data.as_ptr() as *const ::std::os::raw::c_void,
1019                data.len() as i32,
1020                ht as i32,
1021            ) == 0
1022            {
1023                Ok(())
1024            } else {
1025                Err(Error::BcfSetTag {
1026                    tag: str::from_utf8(tag).unwrap().to_owned(),
1027                })
1028            }
1029        }
1030    }
1031
1032    /// Set flag into the INFO column.
1033    pub fn push_info_flag(&mut self, tag: &[u8]) -> Result<()> {
1034        self.push_info_string_impl(tag, &[b""], htslib::BCF_HT_FLAG)
1035    }
1036
1037    /// Remove the flag from the INFO column.
1038    pub fn clear_info_flag(&mut self, tag: &[u8]) -> Result<()> {
1039        self.push_info_string_impl(tag, &[], htslib::BCF_HT_FLAG)
1040    }
1041
1042    /// Add/replace a string-typed INFO entry.
1043    pub fn push_info_string(&mut self, tag: &[u8], data: &[&[u8]]) -> Result<()> {
1044        self.push_info_string_impl(tag, data, htslib::BCF_HT_STR)
1045    }
1046
1047    /// Remove the string field from the INFO column.
1048    pub fn clear_info_string(&mut self, tag: &[u8]) -> Result<()> {
1049        self.push_info_string_impl(tag, &[], htslib::BCF_HT_STR)
1050    }
1051
1052    /// Add an string-valued INFO tag.
1053    fn push_info_string_impl(&mut self, tag: &[u8], data: &[&[u8]], ht: u32) -> Result<()> {
1054        let mut buf: Vec<u8> = Vec::new();
1055        for (i, &s) in data.iter().enumerate() {
1056            if i > 0 {
1057                buf.extend(b",");
1058            }
1059            buf.extend(s);
1060        }
1061        let c_str = ffi::CString::new(buf).unwrap();
1062        let len = if ht == htslib::BCF_HT_FLAG {
1063            data.len()
1064        } else {
1065            c_str.to_bytes().len()
1066        };
1067        let tag_c_str = ffi::CString::new(tag).unwrap();
1068        unsafe {
1069            if htslib::bcf_update_info(
1070                self.header().inner,
1071                self.inner,
1072                tag_c_str.as_ptr() as *mut c_char,
1073                c_str.as_ptr() as *const ::std::os::raw::c_void,
1074                len as i32,
1075                ht as i32,
1076            ) == 0
1077            {
1078                Ok(())
1079            } else {
1080                Err(Error::BcfSetTag {
1081                    tag: str::from_utf8(tag).unwrap().to_owned(),
1082                })
1083            }
1084        }
1085    }
1086
1087    /// Remove unused alleles.
1088    pub fn trim_alleles(&mut self) -> Result<()> {
1089        match unsafe { htslib::bcf_trim_alleles(self.header().inner, self.inner) } {
1090            -1 => Err(Error::BcfRemoveAlleles),
1091            _ => Ok(()),
1092        }
1093    }
1094
1095    pub fn remove_alleles(&mut self, remove: &[bool]) -> Result<()> {
1096        let rm_set = unsafe { htslib::kbs_init(remove.len()) };
1097
1098        for (i, &r) in remove.iter().enumerate() {
1099            if r {
1100                unsafe {
1101                    htslib::kbs_insert(rm_set, i as i32);
1102                }
1103            }
1104        }
1105
1106        let ret = unsafe { htslib::bcf_remove_allele_set(self.header().inner, self.inner, rm_set) };
1107
1108        unsafe {
1109            htslib::kbs_destroy(rm_set);
1110        }
1111
1112        match ret {
1113            -1 => Err(Error::BcfRemoveAlleles),
1114            _ => Ok(()),
1115        }
1116    }
1117
1118    /// Get the length of the reference allele. If the record has no reference allele, then the
1119    /// result will be `0`.
1120    ///
1121    /// # Example
1122    /// ```rust
1123    /// # use rust_htslib::bcf::{Format, Writer};
1124    /// # use rust_htslib::bcf::header::Header;
1125    /// #
1126    /// # // Create minimal VCF header with a single sample
1127    /// # let mut header = Header::new();
1128    /// # header.push_sample("sample".as_bytes());
1129    /// #
1130    /// # // Write uncompressed VCF to stdout with above header and get an empty record
1131    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
1132    /// # let mut record = vcf.empty_record();
1133    /// # assert_eq!(record.rlen(), 0);
1134    /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1135    /// record.set_alleles(alleles).expect("Failed to set alleles");
1136    /// assert_eq!(record.rlen(), 3)
1137    /// ```
1138    pub fn rlen(&self) -> i64 {
1139        self.inner().rlen
1140    }
1141
1142    /// Clear all parts of the record. Useful if you plan to reuse a record object multiple times.
1143    ///
1144    /// # Example
1145    /// ```rust
1146    /// # use rust_htslib::bcf::{Format, Writer};
1147    /// # use rust_htslib::bcf::header::Header;
1148    /// #
1149    /// # // Create minimal VCF header with a single sample
1150    /// # let mut header = Header::new();
1151    /// # header.push_sample("sample".as_bytes());
1152    /// #
1153    /// # // Write uncompressed VCF to stdout with above header and get an empty record
1154    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
1155    /// # let mut record = vcf.empty_record();
1156    /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1157    /// record.set_alleles(alleles).expect("Failed to set alleles");
1158    /// record.set_pos(6);
1159    /// record.clear();
1160    /// assert_eq!(record.rlen(), 0);
1161    /// assert_eq!(record.pos(), 0)
1162    /// ```
1163    pub fn clear(&self) {
1164        unsafe { htslib::bcf_clear(self.inner) }
1165    }
1166
1167    /// Provide short description of record for locating it in the BCF/VCF file.
1168    pub fn desc(&self) -> String {
1169        if let Some(rid) = self.rid() {
1170            if let Ok(contig) = self.header.rid2name(rid) {
1171                return format!("{}:{}", str::from_utf8(contig).unwrap(), self.pos());
1172            }
1173        }
1174        "".to_owned()
1175    }
1176
1177    /// Convert to VCF String
1178    ///
1179    /// Intended for debug only. Use Writer for efficient VCF output.
1180    ///
1181    pub fn to_vcf_string(&self) -> Result<String> {
1182        let mut buf = htslib::kstring_t {
1183            l: 0,
1184            m: 0,
1185            s: ptr::null_mut(),
1186        };
1187        let ret = unsafe { htslib::vcf_format(self.header().inner, self.inner, &mut buf) };
1188
1189        if ret < 0 {
1190            if !buf.s.is_null() {
1191                unsafe {
1192                    libc::free(buf.s as *mut libc::c_void);
1193                }
1194            }
1195            return Err(Error::BcfToString);
1196        }
1197
1198        let vcf_str = unsafe {
1199            let vcf_str = String::from(ffi::CStr::from_ptr(buf.s).to_str().unwrap());
1200            if !buf.s.is_null() {
1201                libc::free(buf.s as *mut libc::c_void);
1202            }
1203            vcf_str
1204        };
1205
1206        Ok(vcf_str)
1207    }
1208}
1209
1210impl Clone for Record {
1211    fn clone(&self) -> Self {
1212        let inner = unsafe { htslib::bcf_dup(self.inner) };
1213        Record {
1214            inner,
1215            header: self.header.clone(),
1216        }
1217    }
1218}
1219
1220impl genome::AbstractLocus for Record {
1221    fn contig(&self) -> &str {
1222        str::from_utf8(
1223            self.header()
1224                .rid2name(self.rid().expect("rid not set"))
1225                .expect("unable to find rid in header"),
1226        )
1227        .expect("unable to interpret contig name as UTF-8")
1228    }
1229
1230    fn pos(&self) -> u64 {
1231        self.pos() as u64
1232    }
1233}
1234
1235/// Phased or unphased alleles, represented as indices.
1236#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1237pub enum GenotypeAllele {
1238    Unphased(i32),
1239    Phased(i32),
1240    UnphasedMissing,
1241    PhasedMissing,
1242}
1243
1244impl GenotypeAllele {
1245    /// Decode given integer according to BCF standard.
1246    #[deprecated(
1247        since = "0.36.0",
1248        note = "Please use the conversion trait From<i32> for GenotypeAllele instead."
1249    )]
1250    pub fn from_encoded(encoded: i32) -> Self {
1251        match (encoded, encoded & 1) {
1252            (0, 0) => GenotypeAllele::UnphasedMissing,
1253            (1, 1) => GenotypeAllele::PhasedMissing,
1254            (e, 1) => GenotypeAllele::Phased((e >> 1) - 1),
1255            (e, 0) => GenotypeAllele::Unphased((e >> 1) - 1),
1256            _ => panic!("unexpected phasing type"),
1257        }
1258    }
1259
1260    /// Get the index into the list of alleles.
1261    pub fn index(self) -> Option<u32> {
1262        match self {
1263            GenotypeAllele::Unphased(i) | GenotypeAllele::Phased(i) => Some(i as u32),
1264            GenotypeAllele::UnphasedMissing | GenotypeAllele::PhasedMissing => None,
1265        }
1266    }
1267}
1268
1269impl fmt::Display for GenotypeAllele {
1270    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1271        match self.index() {
1272            Some(a) => write!(f, "{}", a),
1273            None => write!(f, "."),
1274        }
1275    }
1276}
1277
1278impl From<GenotypeAllele> for i32 {
1279    fn from(allele: GenotypeAllele) -> i32 {
1280        let (allele, phased) = match allele {
1281            GenotypeAllele::UnphasedMissing => (-1, 0),
1282            GenotypeAllele::PhasedMissing => (-1, 1),
1283            GenotypeAllele::Unphased(a) => (a, 0),
1284            GenotypeAllele::Phased(a) => (a, 1),
1285        };
1286        ((allele + 1) << 1) | phased
1287    }
1288}
1289
1290impl From<i32> for GenotypeAllele {
1291    fn from(encoded: i32) -> GenotypeAllele {
1292        match (encoded, encoded & 1) {
1293            (0, 0) => GenotypeAllele::UnphasedMissing,
1294            (1, 1) => GenotypeAllele::PhasedMissing,
1295            (e, 1) => GenotypeAllele::Phased((e >> 1) - 1),
1296            (e, 0) => GenotypeAllele::Unphased((e >> 1) - 1),
1297            _ => panic!("unexpected phasing type"),
1298        }
1299    }
1300}
1301
1302custom_derive! {
1303    /// Genotype representation as a vector of `GenotypeAllele`.
1304    #[derive(NewtypeDeref, Debug, Clone, PartialEq, Eq, Hash)]
1305    pub struct Genotype(Vec<GenotypeAllele>);
1306}
1307
1308impl fmt::Display for Genotype {
1309    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1310        let Genotype(alleles) = self;
1311        write!(f, "{}", alleles[0])?;
1312        for a in &alleles[1..] {
1313            let sep = match a {
1314                GenotypeAllele::Phased(_) | GenotypeAllele::PhasedMissing => '|',
1315                GenotypeAllele::Unphased(_) | GenotypeAllele::UnphasedMissing => '/',
1316            };
1317            write!(f, "{}{}", sep, a)?;
1318        }
1319        Ok(())
1320    }
1321}
1322
1323/// Lazy representation of genotypes, that does no computation until a particular genotype is queried.
1324#[derive(Debug)]
1325pub struct Genotypes<'a, B>
1326where
1327    B: Borrow<Buffer> + 'a,
1328{
1329    encoded: BufferBacked<'a, Vec<&'a [i32]>, B>,
1330}
1331
1332impl<'a, B: Borrow<Buffer> + 'a> Genotypes<'a, B> {
1333    /// Get genotype of ith sample.
1334    ///
1335    /// Note that the result complies with the BCF spec. This means that the
1336    /// first allele will always be marked as `Unphased`. That is, if you have 1|1 in the VCF,
1337    /// this method will return `[Unphased(1), Phased(1)]`.
1338    pub fn get(&self, i: usize) -> Genotype {
1339        let igt = self.encoded[i];
1340        let allelles = igt
1341            .iter()
1342            .take_while(|&&i| i != VECTOR_END_INTEGER)
1343            .map(|&i| GenotypeAllele::from(i))
1344            .collect();
1345        Genotype(allelles)
1346    }
1347}
1348
1349impl Drop for Record {
1350    fn drop(&mut self) {
1351        unsafe { htslib::bcf_destroy(self.inner) };
1352    }
1353}
1354
1355unsafe impl Send for Record {}
1356
1357unsafe impl Sync for Record {}
1358
1359/// Info tag representation.
1360#[derive(Debug)]
1361pub struct Info<'a, B: BorrowMut<Buffer> + Borrow<Buffer>> {
1362    record: &'a Record,
1363    tag: &'a [u8],
1364    buffer: B,
1365}
1366
1367pub type BufferBackedOption<'b, B> = Option<BufferBacked<'b, Vec<&'b [u8]>, B>>;
1368
1369impl<'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Info<'_, B> {
1370    /// Short description of info tag.
1371    pub fn desc(&self) -> String {
1372        str::from_utf8(self.tag).unwrap().to_owned()
1373    }
1374
1375    fn data(&mut self, data_type: u32) -> Result<Option<i32>> {
1376        let mut n: i32 = self.buffer.borrow().len;
1377        let c_str = ffi::CString::new(self.tag).unwrap();
1378        let ret = unsafe {
1379            htslib::bcf_get_info_values(
1380                self.record.header().inner,
1381                self.record.inner,
1382                c_str.as_ptr() as *mut c_char,
1383                &mut self.buffer.borrow_mut().inner,
1384                &mut n,
1385                data_type as i32,
1386            )
1387        };
1388        self.buffer.borrow_mut().len = n;
1389
1390        match ret {
1391            -1 => Err(Error::BcfUndefinedTag { tag: self.desc() }),
1392            -2 => Err(Error::BcfUnexpectedType { tag: self.desc() }),
1393            -3 => Ok(None),
1394            ret => Ok(Some(ret)),
1395        }
1396    }
1397
1398    /// Get integers from tag. `None` if tag not present in record.
1399    ///
1400    /// Import `bcf::record::Numeric` for missing value handling.
1401    ///
1402    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1403    /// as along as the data is accessed. If parts of the data are accessed while
1404    /// the BufferBacked object is already dropped, you will access unallocated
1405    /// memory.
1406    pub fn integer(mut self) -> Result<Option<BufferBacked<'b, &'b [i32], B>>> {
1407        self.data(htslib::BCF_HT_INT).map(|data| {
1408            data.map(|ret| {
1409                let values = unsafe {
1410                    slice::from_raw_parts(self.buffer.borrow().inner as *const i32, ret as usize)
1411                };
1412                BufferBacked::new(&values[..ret as usize], self.buffer)
1413            })
1414        })
1415    }
1416
1417    /// Get floats from tag. `None` if tag not present in record.
1418    ///
1419    /// Import `bcf::record::Numeric` for missing value handling.
1420    ///
1421    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1422    /// as along as the data is accessed. If parts of the data are accessed while
1423    /// the BufferBacked object is already dropped, you will access unallocated
1424    /// memory.
1425    pub fn float(mut self) -> Result<Option<BufferBacked<'b, &'b [f32], B>>> {
1426        self.data(htslib::BCF_HT_REAL).map(|data| {
1427            data.map(|ret| {
1428                let values = unsafe {
1429                    slice::from_raw_parts(self.buffer.borrow().inner as *const f32, ret as usize)
1430                };
1431                BufferBacked::new(&values[..ret as usize], self.buffer)
1432            })
1433        })
1434    }
1435
1436    /// Get flags from tag. `false` if not set.
1437    pub fn flag(&mut self) -> Result<bool> {
1438        self.data(htslib::BCF_HT_FLAG).map(|data| match data {
1439            Some(ret) => ret == 1,
1440            None => false,
1441        })
1442    }
1443
1444    /// Get strings from tag. `None` if tag not present in record.
1445    ///
1446    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1447    /// as along as the data is accessed. If parts of the data are accessed while
1448    /// the BufferBacked object is already dropped, you will access unallocated
1449    /// memory.
1450    pub fn string(mut self) -> Result<BufferBackedOption<'b, B>> {
1451        self.data(htslib::BCF_HT_STR).map(|data| {
1452            data.map(|ret| {
1453                BufferBacked::new(
1454                    unsafe {
1455                        slice::from_raw_parts(self.buffer.borrow().inner as *const u8, ret as usize)
1456                    }
1457                    .split(|c| *c == b',')
1458                    .map(|s| {
1459                        // stop at zero character
1460                        s.split(|c| *c == 0u8)
1461                            .next()
1462                            .expect("Bug: returned string should not be empty.")
1463                    })
1464                    .collect(),
1465                    self.buffer,
1466                )
1467            })
1468        })
1469    }
1470}
1471
1472unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Send for Info<'_, B> {}
1473
1474unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Sync for Info<'_, B> {}
1475
1476fn trim_slice<T: PartialEq + NumericUtils>(s: &[T]) -> &[T] {
1477    s.split(|v| v.is_vector_end())
1478        .next()
1479        .expect("Bug: returned slice should not be empty.")
1480}
1481
1482// Representation of per-sample data.
1483#[derive(Debug)]
1484pub struct Format<'a, B: BorrowMut<Buffer> + Borrow<Buffer>> {
1485    record: &'a Record,
1486    tag: &'a [u8],
1487    inner: *mut htslib::bcf_fmt_t,
1488    buffer: B,
1489}
1490
1491impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Format<'a, B> {
1492    /// Create new format data in a given record.
1493    fn new(record: &'a Record, tag: &'a [u8], buffer: B) -> Format<'a, B> {
1494        let c_str = ffi::CString::new(tag).unwrap();
1495        let inner = unsafe {
1496            htslib::bcf_get_fmt(
1497                record.header().inner,
1498                record.inner,
1499                c_str.as_ptr() as *mut c_char,
1500            )
1501        };
1502        Format {
1503            record,
1504            tag,
1505            inner,
1506            buffer,
1507        }
1508    }
1509
1510    /// Provide short description of format entry (just the tag name).
1511    pub fn desc(&self) -> String {
1512        str::from_utf8(self.tag).unwrap().to_owned()
1513    }
1514
1515    pub fn inner(&self) -> &htslib::bcf_fmt_t {
1516        unsafe { &*self.inner }
1517    }
1518
1519    pub fn inner_mut(&mut self) -> &mut htslib::bcf_fmt_t {
1520        unsafe { &mut *self.inner }
1521    }
1522
1523    fn values_per_sample(&self) -> usize {
1524        self.inner().n as usize
1525    }
1526
1527    /// Read and decode format data into a given type.
1528    fn data(&mut self, data_type: u32) -> Result<i32> {
1529        let mut n: i32 = self.buffer.borrow().len;
1530        let c_str = ffi::CString::new(self.tag).unwrap();
1531        let ret = unsafe {
1532            htslib::bcf_get_format_values(
1533                self.record.header().inner,
1534                self.record.inner,
1535                c_str.as_ptr() as *mut c_char,
1536                &mut self.buffer.borrow_mut().inner,
1537                &mut n,
1538                data_type as i32,
1539            )
1540        };
1541        self.buffer.borrow_mut().len = n;
1542        match ret {
1543            -1 => Err(Error::BcfUndefinedTag { tag: self.desc() }),
1544            -2 => Err(Error::BcfUnexpectedType { tag: self.desc() }),
1545            -3 => Err(Error::BcfMissingTag {
1546                tag: self.desc(),
1547                record: self.record.desc(),
1548            }),
1549            ret => Ok(ret),
1550        }
1551    }
1552
1553    /// Get format data as integers.
1554    ///
1555    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1556    /// as long as the data is accessed. If parts of the data are accessed while
1557    /// the BufferBacked object is already dropped, you will access unallocated
1558    /// memory.
1559    pub fn integer(mut self) -> Result<BufferBacked<'b, Vec<&'b [i32]>, B>> {
1560        self.data(htslib::BCF_HT_INT).map(|ret| {
1561            BufferBacked::new(
1562                unsafe {
1563                    slice::from_raw_parts(
1564                        self.buffer.borrow_mut().inner as *const i32,
1565                        ret as usize,
1566                    )
1567                }
1568                .chunks(self.values_per_sample())
1569                .map(trim_slice)
1570                .collect(),
1571                self.buffer,
1572            )
1573        })
1574    }
1575
1576    /// Get format data as floats.
1577    ///
1578    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1579    /// as along as the data is accessed. If parts of the data are accessed while
1580    /// the BufferBacked object is already dropped, you will access unallocated
1581    /// memory.
1582    pub fn float(mut self) -> Result<BufferBacked<'b, Vec<&'b [f32]>, B>> {
1583        self.data(htslib::BCF_HT_REAL).map(|ret| {
1584            BufferBacked::new(
1585                unsafe {
1586                    slice::from_raw_parts(
1587                        self.buffer.borrow_mut().inner as *const f32,
1588                        ret as usize,
1589                    )
1590                }
1591                .chunks(self.values_per_sample())
1592                .map(trim_slice)
1593                .collect(),
1594                self.buffer,
1595            )
1596        })
1597    }
1598
1599    /// Get format data as byte slices. To obtain the values strings, use `std::str::from_utf8`.
1600    ///
1601    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1602    /// as along as the data is accessed. If parts of the data are accessed while
1603    /// the BufferBacked object is already dropped, you will access unallocated
1604    /// memory.
1605    pub fn string(mut self) -> Result<BufferBacked<'b, Vec<&'b [u8]>, B>> {
1606        self.data(htslib::BCF_HT_STR).map(|ret| {
1607            if ret == 0 {
1608                return BufferBacked::new(Vec::new(), self.buffer);
1609            }
1610            BufferBacked::new(
1611                unsafe {
1612                    slice::from_raw_parts(self.buffer.borrow_mut().inner as *const u8, ret as usize)
1613                }
1614                .chunks(self.values_per_sample())
1615                .map(|s| {
1616                    // stop at zero character
1617                    s.split(|c| *c == 0u8)
1618                        .next()
1619                        .expect("Bug: returned string should not be empty.")
1620                })
1621                .collect(),
1622                self.buffer,
1623            )
1624        })
1625    }
1626}
1627
1628unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Send for Format<'_, B> {}
1629
1630unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Sync for Format<'_, B> {}
1631
1632#[derive(Debug)]
1633pub struct Filters<'a> {
1634    /// Reference to the `Record` to enumerate records for.
1635    record: &'a Record,
1636    /// Index of the next filter to return, if not at end.
1637    idx: i32,
1638}
1639
1640impl<'a> Filters<'a> {
1641    pub fn new(record: &'a Record) -> Self {
1642        Filters { record, idx: 0 }
1643    }
1644}
1645
1646impl Iterator for Filters<'_> {
1647    type Item = Id;
1648
1649    fn next(&mut self) -> Option<Id> {
1650        if self.record.inner().d.n_flt <= self.idx {
1651            None
1652        } else {
1653            let i = self.idx as isize;
1654            self.idx += 1;
1655            Some(Id(unsafe { *self.record.inner().d.flt.offset(i) } as u32))
1656        }
1657    }
1658}
1659
1660#[cfg(test)]
1661mod tests {
1662    use super::*;
1663    use crate::bcf::{Format, Header, Writer};
1664    use tempfile::NamedTempFile;
1665
1666    #[test]
1667    fn test_missing_float() {
1668        let expected: u32 = 0x7F80_0001;
1669        assert_eq!(MISSING_FLOAT.bits(), expected);
1670    }
1671
1672    #[test]
1673    fn test_vector_end_float() {
1674        let expected: u32 = 0x7F80_0002;
1675        assert_eq!(VECTOR_END_FLOAT.bits(), expected);
1676    }
1677
1678    #[test]
1679    fn test_record_rlen() {
1680        let tmp = NamedTempFile::new().unwrap();
1681        let path = tmp.path();
1682        let header = Header::new();
1683        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1684        let mut record = vcf.empty_record();
1685        assert_eq!(record.rlen(), 0);
1686        let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1687        record.set_alleles(alleles).expect("Failed to set alleles");
1688        assert_eq!(record.rlen(), 3)
1689    }
1690
1691    #[test]
1692    fn test_record_end() {
1693        let tmp = NamedTempFile::new().unwrap();
1694        let path = tmp.path();
1695        let header = Header::new();
1696        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1697        let mut record = vcf.empty_record();
1698        let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1699        record.set_alleles(alleles).expect("Failed to set alleles");
1700        record.set_pos(5);
1701
1702        assert_eq!(record.end(), 8)
1703    }
1704
1705    #[test]
1706    fn test_record_clear() {
1707        let tmp = NamedTempFile::new().unwrap();
1708        let path = tmp.path();
1709        let mut header = Header::new();
1710        header.push_sample("sample".as_bytes());
1711        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1712        let mut record = vcf.empty_record();
1713        let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1714        record.set_alleles(alleles).expect("Failed to set alleles");
1715        record.set_pos(6);
1716        record.clear();
1717
1718        assert_eq!(record.rlen(), 0);
1719        assert_eq!(record.sample_count(), 0);
1720        assert_eq!(record.pos(), 0)
1721    }
1722
1723    #[test]
1724    fn test_record_clone() {
1725        let tmp = NamedTempFile::new().unwrap();
1726        let path = tmp.path();
1727        let header = Header::new();
1728        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1729        let mut record = vcf.empty_record();
1730        let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1731        record.set_alleles(alleles).expect("Failed to set alleles");
1732        record.set_pos(6);
1733
1734        let mut cloned_record = record.clone();
1735        cloned_record.set_pos(5);
1736
1737        assert_eq!(record.pos(), 6);
1738        assert_eq!(record.allele_count(), 2);
1739        assert_eq!(cloned_record.pos(), 5);
1740        assert_eq!(cloned_record.allele_count(), 2);
1741    }
1742
1743    #[test]
1744    fn test_record_has_filter_pass_is_default() {
1745        let tmp = NamedTempFile::new().unwrap();
1746        let path = tmp.path();
1747        let header = Header::new();
1748        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1749        let record = vcf.empty_record();
1750
1751        assert!(record.has_filter("PASS".as_bytes()));
1752        assert!(record.has_filter(".".as_bytes()));
1753        assert!(record.has_filter(&Id(0)));
1754        assert!(!record.has_filter("foo".as_bytes()));
1755        assert!(!record.has_filter(&Id(2)));
1756    }
1757
1758    #[test]
1759    fn test_record_has_filter_custom() {
1760        let tmp = NamedTempFile::new().unwrap();
1761        let path = tmp.path();
1762        let mut header = Header::new();
1763        header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1764        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1765        let mut record = vcf.empty_record();
1766        record.push_filter("foo".as_bytes()).unwrap();
1767
1768        assert!(record.has_filter("foo".as_bytes()));
1769        assert!(!record.has_filter("PASS".as_bytes()))
1770    }
1771
1772    #[test]
1773    fn test_record_push_filter() {
1774        let tmp = NamedTempFile::new().unwrap();
1775        let path = tmp.path();
1776        let mut header = Header::new();
1777        header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1778        header.push_record(br#"##FILTER=<ID=bar,Description="dranks">"#);
1779        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1780        let mut record = vcf.empty_record();
1781        assert!(record.has_filter("PASS".as_bytes()));
1782        record.push_filter("foo".as_bytes()).unwrap();
1783        let bar = record.header().name_to_id(b"bar").unwrap();
1784        record.push_filter(&bar).unwrap();
1785        assert!(record.has_filter("foo".as_bytes()));
1786        assert!(record.has_filter(&bar));
1787        assert!(!record.has_filter("PASS".as_bytes()));
1788        assert!(record.push_filter("baz".as_bytes()).is_err())
1789    }
1790
1791    #[test]
1792    fn test_record_set_filters() {
1793        let tmp = NamedTempFile::new().unwrap();
1794        let path = tmp.path();
1795        let mut header = Header::new();
1796        header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1797        header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
1798        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1799        let mut record = vcf.empty_record();
1800        assert!(record.has_filter("PASS".as_bytes()));
1801        record
1802            .set_filters(&["foo".as_bytes(), "bar".as_bytes()])
1803            .unwrap();
1804        assert!(record.has_filter("foo".as_bytes()));
1805        assert!(record.has_filter("bar".as_bytes()));
1806        assert!(!record.has_filter("PASS".as_bytes()));
1807        let filters: &[&Id] = &[];
1808        record.set_filters(filters).unwrap();
1809        assert!(record.has_filter("PASS".as_bytes()));
1810        assert!(!record.has_filter("foo".as_bytes()));
1811        assert!(record
1812            .set_filters(&["foo".as_bytes(), "baz".as_bytes()])
1813            .is_err())
1814    }
1815
1816    #[test]
1817    fn test_record_remove_filter() {
1818        let tmp = NamedTempFile::new().unwrap();
1819        let path = tmp.path();
1820        let mut header = Header::new();
1821        header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1822        header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
1823        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1824        let mut record = vcf.empty_record();
1825        let foo = record.header().name_to_id(b"foo").unwrap();
1826        let bar = record.header().name_to_id(b"bar").unwrap();
1827        record.set_filters(&[&foo, &bar]).unwrap();
1828        assert!(record.has_filter(&foo));
1829        assert!(record.has_filter(&bar));
1830        record.remove_filter(&foo, true).unwrap();
1831        assert!(!record.has_filter(&foo));
1832        assert!(record.has_filter(&bar));
1833        assert!(record.remove_filter("baz".as_bytes(), true).is_err());
1834        record.remove_filter(&bar, true).unwrap();
1835        assert!(!record.has_filter(&bar));
1836        assert!(record.has_filter("PASS".as_bytes()));
1837    }
1838
1839    #[test]
1840    fn test_record_to_vcf_string_err() {
1841        let tmp = NamedTempFile::new().unwrap();
1842        let path = tmp.path();
1843        let header = Header::new();
1844        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1845        let record = vcf.empty_record();
1846        assert!(record.to_vcf_string().is_err());
1847    }
1848
1849    #[test]
1850    fn test_record_to_vcf_string() {
1851        let tmp = NamedTempFile::new().unwrap();
1852        let path = tmp.path();
1853        let mut header = Header::new();
1854        header.push_record(b"##contig=<ID=chr1,length=1000>");
1855        header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1856        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1857        let mut record = vcf.empty_record();
1858        record.push_filter("foo".as_bytes()).unwrap();
1859        assert_eq!(
1860            record.to_vcf_string().unwrap(),
1861            "chr1\t1\t.\t.\t.\t0\tfoo\t.\n"
1862        );
1863    }
1864}