rust_htslib/bcf/
record.rs

1// Copyright 2014 Johannes Köster.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6use std::borrow::{Borrow, BorrowMut};
7use std::fmt;
8use std::marker::PhantomData;
9use std::ops::Deref;
10use std::os::raw::c_char;
11use std::ptr;
12use std::rc::Rc;
13use std::slice;
14use std::str;
15use std::{ffi, iter};
16
17use bio_types::genome;
18use derive_new::new;
19use ieee754::Ieee754;
20use lazy_static::lazy_static;
21
22use crate::bcf::header::{HeaderView, Id};
23use crate::bcf::Error;
24use crate::errors::Result;
25use crate::htslib;
26
27const MISSING_INTEGER: i32 = i32::MIN;
28const VECTOR_END_INTEGER: i32 = i32::MIN + 1;
29
30lazy_static! {
31    static ref MISSING_FLOAT: f32 = Ieee754::from_bits(0x7F80_0001);
32    static ref VECTOR_END_FLOAT: f32 = Ieee754::from_bits(0x7F80_0002);
33}
34
35/// Common methods for numeric INFO and FORMAT entries
36pub trait Numeric {
37    /// Return true if entry is a missing value
38    fn is_missing(&self) -> bool;
39
40    /// Return missing value for storage in BCF record.
41    fn missing() -> Self;
42}
43
44impl Numeric for f32 {
45    fn is_missing(&self) -> bool {
46        self.bits() == MISSING_FLOAT.bits()
47    }
48
49    fn missing() -> f32 {
50        *MISSING_FLOAT
51    }
52}
53
54impl Numeric for i32 {
55    fn is_missing(&self) -> bool {
56        *self == MISSING_INTEGER
57    }
58
59    fn missing() -> i32 {
60        MISSING_INTEGER
61    }
62}
63
64trait NumericUtils {
65    /// Return true if entry marks the end of the record.
66    fn is_vector_end(&self) -> bool;
67}
68
69impl NumericUtils for f32 {
70    fn is_vector_end(&self) -> bool {
71        self.bits() == VECTOR_END_FLOAT.bits()
72    }
73}
74
75impl NumericUtils for i32 {
76    fn is_vector_end(&self) -> bool {
77        *self == VECTOR_END_INTEGER
78    }
79}
80
81/// A trait to allow for seamless use of bytes or integer identifiers for filters
82pub trait FilterId {
83    fn id_from_header(&self, header: &HeaderView) -> Result<Id>;
84    fn is_pass(&self) -> bool;
85}
86
87impl FilterId for [u8] {
88    fn id_from_header(&self, header: &HeaderView) -> Result<Id> {
89        header.name_to_id(self)
90    }
91    fn is_pass(&self) -> bool {
92        matches!(self, b"PASS" | b".")
93    }
94}
95
96impl FilterId for Id {
97    fn id_from_header(&self, _header: &HeaderView) -> Result<Id> {
98        Ok(*self)
99    }
100    fn is_pass(&self) -> bool {
101        *self == Id(0)
102    }
103}
104
105/// A buffer for info or format data.
106#[derive(Debug)]
107pub struct Buffer {
108    inner: *mut ::std::os::raw::c_void,
109    len: i32,
110}
111
112impl Buffer {
113    pub fn new() -> Self {
114        Buffer {
115            inner: ptr::null_mut(),
116            len: 0,
117        }
118    }
119}
120
121impl Default for Buffer {
122    fn default() -> Self {
123        Self::new()
124    }
125}
126
127impl Drop for Buffer {
128    fn drop(&mut self) {
129        unsafe {
130            ::libc::free(self.inner);
131        }
132    }
133}
134
135#[derive(new, Debug)]
136pub struct BufferBacked<'a, T: 'a + fmt::Debug, B: Borrow<Buffer> + 'a> {
137    value: T,
138    _buffer: B,
139    #[new(default)]
140    phantom: PhantomData<&'a B>,
141}
142
143impl<'a, T: 'a + fmt::Debug, B: Borrow<Buffer> + 'a> Deref for BufferBacked<'a, T, B> {
144    type Target = T;
145
146    fn deref(&self) -> &T {
147        &self.value
148    }
149}
150
151impl<'a, T: 'a + fmt::Debug + fmt::Display, B: Borrow<Buffer> + 'a> fmt::Display
152    for BufferBacked<'a, T, B>
153{
154    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155        fmt::Display::fmt(&self.value, f)
156    }
157}
158
159/// A VCF/BCF record.
160/// New records can be created by the `empty_record` methods of [`bcf::Reader`](crate::bcf::Reader)
161/// and [`bcf::Writer`](crate::bcf::Writer).
162/// # Example
163/// ```rust
164/// use rust_htslib::bcf::{Format, Writer};
165/// use rust_htslib::bcf::header::Header;
166///
167/// // Create minimal VCF header with a single sample
168/// let mut header = Header::new();
169/// header.push_sample("sample".as_bytes());
170///
171/// // Write uncompressed VCF to stdout with above header and get an empty record
172/// let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
173/// let mut record = vcf.empty_record();
174/// ```
175#[derive(Debug)]
176pub struct Record {
177    pub inner: *mut htslib::bcf1_t,
178    header: Rc<HeaderView>,
179}
180
181impl Record {
182    /// Construct record with reference to header `HeaderView`, for create-internal use.
183    pub(crate) fn new(header: Rc<HeaderView>) -> Self {
184        let inner = unsafe {
185            let inner = htslib::bcf_init();
186            // Always unpack record.
187            htslib::bcf_unpack(inner, htslib::BCF_UN_ALL as i32);
188            inner
189        };
190        Record { inner, header }
191    }
192
193    /// Force unpacking of internal record values.
194    pub fn unpack(&mut self) {
195        unsafe { htslib::bcf_unpack(self.inner, htslib::BCF_UN_ALL as i32) };
196    }
197
198    /// Return associated header.
199    pub fn header(&self) -> &HeaderView {
200        self.header.as_ref()
201    }
202
203    /// Set the record header.
204    pub(crate) fn set_header(&mut self, header: Rc<HeaderView>) {
205        self.header = header;
206    }
207
208    /// Return reference to the inner C struct.
209    ///
210    /// # Remarks
211    ///
212    /// Note that this function is only required as long as Rust-Htslib does not provide full
213    /// access to all aspects of Htslib.
214    pub fn inner(&self) -> &htslib::bcf1_t {
215        unsafe { &*self.inner }
216    }
217
218    /// Return mutable reference to inner C struct.
219    ///
220    /// # Remarks
221    ///
222    /// Note that this function is only required as long as Rust-Htslib does not provide full
223    /// access to all aspects of Htslib.
224    pub fn inner_mut(&mut self) -> &mut htslib::bcf1_t {
225        unsafe { &mut *self.inner }
226    }
227
228    /// Get the reference id of the record.
229    ///
230    /// To look up the contig name,
231    /// use [`HeaderView::rid2name`](../header/struct.HeaderView.html#method.rid2name).
232    ///
233    /// # Returns
234    ///
235    /// - `Some(rid)` if the internal `rid` is set to a value that is not `-1`
236    /// - `None` if the internal `rid` is set to `-1`
237    pub fn rid(&self) -> Option<u32> {
238        match self.inner().rid {
239            -1 => None,
240            rid => Some(rid as u32),
241        }
242    }
243
244    /// Update the reference id of the record.
245    ///
246    /// To look up reference id for a contig name,
247    /// use [`HeaderView::name2rid`](../header/struct.HeaderView.html#method.name2rid).
248    ///
249    /// # Example
250    ///
251    /// Example assumes we have a Record `record` from a VCF with a header containing region
252    /// named `1`. See [module documentation](../index.html#example-writing) for how to set
253    /// up VCF, header, and record.
254    ///
255    /// ```
256    /// # use rust_htslib::bcf::{Format, Writer};
257    /// # use rust_htslib::bcf::header::Header;
258    /// # let mut header = Header::new();
259    /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
260    /// # header.push_record(header_contig_line.as_bytes());
261    /// # header.push_sample("test_sample".as_bytes());
262    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
263    /// # let mut record = vcf.empty_record();
264    /// let rid = record.header().name2rid(b"1").ok();
265    /// record.set_rid(rid);
266    /// assert_eq!(record.rid(), rid);
267    /// let name = record.header().rid2name(record.rid().unwrap()).ok();
268    /// assert_eq!(Some("1".as_bytes()), name);
269    /// ```
270    pub fn set_rid(&mut self, rid: Option<u32>) {
271        match rid {
272            Some(rid) => self.inner_mut().rid = rid as i32,
273            None => self.inner_mut().rid = -1,
274        }
275    }
276
277    /// Return **0-based** position
278    pub fn pos(&self) -> i64 {
279        self.inner().pos
280    }
281
282    /// Set **0-based** position
283    pub fn set_pos(&mut self, pos: i64) {
284        self.inner_mut().pos = pos;
285    }
286
287    /// Return the **0-based, exclusive** end position
288    ///
289    /// # Example
290    /// ```rust
291    /// # use rust_htslib::bcf::{Format, Header, Writer};
292    /// # use tempfile::NamedTempFile;
293    /// # let tmp = NamedTempFile::new().unwrap();
294    /// # let path = tmp.path();
295    /// # let header = Header::new();
296    /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
297    /// # let mut record = vcf.empty_record();
298    /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
299    /// record.set_alleles(alleles).expect("Failed to set alleles");
300    /// record.set_pos(5);
301    ///
302    /// assert_eq!(record.end(), 8)
303    /// ```
304    pub fn end(&self) -> i64 {
305        self.pos() + self.rlen()
306    }
307
308    /// Return the value of the ID column.
309    ///
310    /// When empty, returns `b".".to_vec()`.
311    pub fn id(&self) -> Vec<u8> {
312        if self.inner().d.id.is_null() {
313            b".".to_vec()
314        } else {
315            let id = unsafe { ffi::CStr::from_ptr(self.inner().d.id) };
316            id.to_bytes().to_vec()
317        }
318    }
319
320    /// Update the ID string to the given value.
321    pub fn set_id(&mut self, id: &[u8]) -> Result<()> {
322        let c_str = ffi::CString::new(id).unwrap();
323        if unsafe {
324            htslib::bcf_update_id(
325                self.header().inner,
326                self.inner,
327                c_str.as_ptr() as *mut c_char,
328            )
329        } == 0
330        {
331            Ok(())
332        } else {
333            Err(Error::BcfSetValues)
334        }
335    }
336
337    /// Clear the ID column (set it to `"."`).
338    pub fn clear_id(&mut self) -> Result<()> {
339        let c_str = ffi::CString::new(&b"."[..]).unwrap();
340        if unsafe {
341            htslib::bcf_update_id(
342                self.header().inner,
343                self.inner,
344                c_str.as_ptr() as *mut c_char,
345            )
346        } == 0
347        {
348            Ok(())
349        } else {
350            Err(Error::BcfSetValues)
351        }
352    }
353
354    /// Add the ID string (the ID field is semicolon-separated), checking for duplicates.
355    pub fn push_id(&mut self, id: &[u8]) -> Result<()> {
356        let c_str = ffi::CString::new(id).unwrap();
357        if unsafe {
358            htslib::bcf_add_id(
359                self.header().inner,
360                self.inner,
361                c_str.as_ptr() as *mut c_char,
362            )
363        } == 0
364        {
365            Ok(())
366        } else {
367            Err(Error::BcfSetValues)
368        }
369    }
370
371    /// Return `Filters` iterator for enumerating all filters that have been set.
372    ///
373    /// A record having the `PASS` filter will return an empty `Filter` here.
374    pub fn filters(&self) -> Filters<'_> {
375        Filters::new(self)
376    }
377
378    /// Query whether the filter with the given ID has been set.
379    ///
380    /// This method can be used to check if a record passes filtering by using either `Id(0)`,
381    /// `PASS` or `.`
382    ///
383    /// # Example
384    /// ```rust
385    /// # use rust_htslib::bcf::{Format, Header, Writer};
386    /// # use rust_htslib::bcf::header::Id;
387    /// # use tempfile::NamedTempFile;
388    /// # let tmp = tempfile::NamedTempFile::new().unwrap();
389    /// # let path = tmp.path();
390    /// let mut header = Header::new();
391    /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
392    /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
393    /// # let mut record = vcf.empty_record();
394    /// assert!(record.has_filter("PASS".as_bytes()));
395    /// assert!(record.has_filter(".".as_bytes()));
396    /// assert!(record.has_filter(&Id(0)));
397    ///
398    /// record.push_filter("foo".as_bytes()).unwrap();
399    /// assert!(record.has_filter("foo".as_bytes()));
400    /// assert!(!record.has_filter("PASS".as_bytes()))
401    /// ```
402    pub fn has_filter<T: FilterId + ?Sized>(&self, flt_id: &T) -> bool {
403        if flt_id.is_pass() && self.inner().d.n_flt == 0 {
404            return true;
405        }
406        let id = match flt_id.id_from_header(self.header()) {
407            Ok(i) => *i,
408            Err(_) => return false,
409        };
410        for i in 0..(self.inner().d.n_flt as isize) {
411            if unsafe { *self.inner().d.flt.offset(i) } == id as i32 {
412                return true;
413            }
414        }
415        false
416    }
417
418    /// Set the given filter IDs to the FILTER column.
419    ///
420    /// Setting an empty slice removes all filters and sets `PASS`.
421    ///
422    /// # Example
423    /// ```rust
424    /// # use rust_htslib::bcf::{Format, Header, Writer};
425    /// # use rust_htslib::bcf::header::Id;
426    /// # use tempfile::NamedTempFile;
427    /// # let tmp = tempfile::NamedTempFile::new().unwrap();
428    /// # let path = tmp.path();
429    /// let mut header = Header::new();
430    /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
431    /// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
432    /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
433    /// # let mut record = vcf.empty_record();
434    /// let foo = record.header().name_to_id(b"foo").unwrap();
435    /// let bar = record.header().name_to_id(b"bar").unwrap();
436    /// assert!(record.has_filter("PASS".as_bytes()));
437    /// let mut filters = vec![&foo, &bar];
438    /// record.set_filters(&filters).unwrap();
439    /// assert!(record.has_filter(&foo));
440    /// assert!(record.has_filter(&bar));
441    /// assert!(!record.has_filter("PASS".as_bytes()));
442    /// filters.clear();
443    /// record.set_filters(&filters).unwrap();
444    /// assert!(record.has_filter("PASS".as_bytes()));
445    /// assert!(!record.has_filter("foo".as_bytes()));
446    /// // 'baz' isn't in the header
447    /// assert!(record.set_filters(&["baz".as_bytes()]).is_err())
448    /// ```
449    ///
450    /// # Errors
451    /// If any of the filter IDs do not exist in the header, an [`Error::BcfUnknownID`] is returned.
452    ///
453    pub fn set_filters<T: FilterId + ?Sized>(&mut self, flt_ids: &[&T]) -> Result<()> {
454        let mut ids: Vec<i32> = flt_ids
455            .iter()
456            .map(|id| id.id_from_header(self.header()).map(|id| *id as i32))
457            .collect::<Result<Vec<i32>>>()?;
458        unsafe {
459            htslib::bcf_update_filter(
460                self.header().inner,
461                self.inner,
462                ids.as_mut_ptr(),
463                ids.len() as i32,
464            );
465        };
466        Ok(())
467    }
468
469    /// Add the given filter to the FILTER column.
470    ///
471    /// If `flt_id` is `PASS` or `.` then all existing filters are removed first. Otherwise,
472    /// any existing `PASS` filter is removed.
473    ///
474    /// # Example
475    /// ```rust
476    /// # use rust_htslib::bcf::{Format, Header, Writer};
477    /// # use tempfile::NamedTempFile;
478    /// # let tmp = tempfile::NamedTempFile::new().unwrap();
479    /// # let path = tmp.path();
480    /// let mut header = Header::new();
481    /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
482    /// header.push_record(br#"##FILTER=<ID=bar,Description="dranks">"#);
483    /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
484    /// # let mut record = vcf.empty_record();
485    /// let foo = "foo".as_bytes();
486    /// let bar = record.header().name_to_id(b"bar").unwrap();
487    /// assert!(record.has_filter("PASS".as_bytes()));
488    ///
489    /// record.push_filter(foo).unwrap();
490    /// record.push_filter(&bar).unwrap();
491    /// assert!(record.has_filter(foo));
492    /// assert!(record.has_filter(&bar));
493    /// // filter must exist in the header
494    /// assert!(record.push_filter("baz".as_bytes()).is_err())
495    /// ```
496    ///
497    /// # Errors
498    /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned.
499    ///
500    pub fn push_filter<T: FilterId + ?Sized>(&mut self, flt_id: &T) -> Result<()> {
501        let id = flt_id.id_from_header(self.header())?;
502        unsafe {
503            htslib::bcf_add_filter(self.header().inner, self.inner, *id as i32);
504        };
505        Ok(())
506    }
507
508    /// Remove the given filter from the FILTER column.
509    ///
510    /// # Arguments
511    ///
512    /// - `flt_id` - The corresponding filter ID to remove.
513    /// - `pass_on_empty` - Set to `PASS` when removing the last filter.
514    ///
515    /// # Example
516    /// ```rust
517    /// # use rust_htslib::bcf::{Format, Header, Writer};
518    /// # use tempfile::NamedTempFile;
519    /// # let tmp = tempfile::NamedTempFile::new().unwrap();
520    /// # let path = tmp.path();
521    /// let mut header = Header::new();
522    /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
523    /// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
524    /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
525    /// # let mut record = vcf.empty_record();
526    /// let foo = "foo".as_bytes();
527    /// let bar = "bar".as_bytes();
528    /// record.set_filters(&[foo, bar]).unwrap();
529    /// assert!(record.has_filter(foo));
530    /// assert!(record.has_filter(bar));
531    ///
532    /// record.remove_filter(foo, true).unwrap();
533    /// assert!(!record.has_filter(foo));
534    /// assert!(record.has_filter(bar));
535    /// // 'baz' is not in the header
536    /// assert!(record.remove_filter("baz".as_bytes(), true).is_err());
537    ///
538    /// record.remove_filter(bar, true).unwrap();
539    /// assert!(!record.has_filter(bar));
540    /// assert!(record.has_filter("PASS".as_bytes()));
541    /// ```
542    ///
543    /// # Errors
544    /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned.
545    ///
546    pub fn remove_filter<T: FilterId + ?Sized>(
547        &mut self,
548        flt_id: &T,
549        pass_on_empty: bool,
550    ) -> Result<()> {
551        let id = flt_id.id_from_header(self.header())?;
552        unsafe {
553            htslib::bcf_remove_filter(
554                self.header().inner,
555                self.inner,
556                *id as i32,
557                pass_on_empty as i32,
558            )
559        };
560        Ok(())
561    }
562
563    /// Get alleles strings.
564    ///
565    /// The first allele is the reference allele.
566    pub fn alleles(&self) -> Vec<&[u8]> {
567        unsafe { htslib::bcf_unpack(self.inner, htslib::BCF_UN_ALL as i32) };
568        let n = self.inner().n_allele() as usize;
569        let dec = self.inner().d;
570        let alleles = unsafe { slice::from_raw_parts(dec.allele, n) };
571        (0..n)
572            .map(|i| unsafe { ffi::CStr::from_ptr(alleles[i]).to_bytes() })
573            .collect()
574    }
575
576    /// Set alleles. The first allele is the reference allele.
577    ///
578    /// # Example
579    /// ```rust
580    /// # use rust_htslib::bcf::{Format, Writer};
581    /// # use rust_htslib::bcf::header::Header;
582    /// #
583    /// # // Create minimal VCF header with a single sample
584    /// # let mut header = Header::new();
585    /// # header.push_sample("sample".as_bytes());
586    /// #
587    /// # // Write uncompressed VCF to stdout with above header and get an empty record
588    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
589    /// # let mut record = vcf.empty_record();
590    /// assert_eq!(record.allele_count(), 0);
591    ///
592    /// let alleles: &[&[u8]] = &[b"A", b"TG"];
593    /// record.set_alleles(alleles).expect("Failed to set alleles");
594    /// assert_eq!(record.allele_count(), 2)
595    /// ```
596    pub fn set_alleles(&mut self, alleles: &[&[u8]]) -> Result<()> {
597        let cstrings: Vec<ffi::CString> = alleles
598            .iter()
599            .map(|vec| ffi::CString::new(*vec).unwrap())
600            .collect();
601        let mut ptrs: Vec<*const c_char> = cstrings
602            .iter()
603            .map(|cstr| cstr.as_ptr() as *const c_char)
604            .collect();
605        if unsafe {
606            htslib::bcf_update_alleles(
607                self.header().inner,
608                self.inner,
609                ptrs.as_mut_ptr(),
610                alleles.len() as i32,
611            )
612        } == 0
613        {
614            Ok(())
615        } else {
616            Err(Error::BcfSetValues)
617        }
618    }
619
620    /// Get variant quality.
621    pub fn qual(&self) -> f32 {
622        self.inner().qual
623    }
624
625    /// Set variant quality.
626    pub fn set_qual(&mut self, qual: f32) {
627        self.inner_mut().qual = qual;
628    }
629
630    pub fn info<'a>(&'a self, tag: &'a [u8]) -> Info<'a, Buffer> {
631        self.info_shared_buffer(tag, Buffer::new())
632    }
633
634    /// Get the value of the given info tag.
635    pub fn info_shared_buffer<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b>(
636        &'a self,
637        tag: &'a [u8],
638        buffer: B,
639    ) -> Info<'a, B> {
640        Info {
641            record: self,
642            tag,
643            buffer,
644        }
645    }
646
647    /// Get the number of samples in the record.
648    pub fn sample_count(&self) -> u32 {
649        self.inner().n_sample()
650    }
651
652    /// Get the number of alleles, including reference allele.
653    pub fn allele_count(&self) -> u32 {
654        self.inner().n_allele()
655    }
656
657    /// Add/replace genotypes in FORMAT GT tag.
658    ///
659    /// # Arguments
660    ///
661    /// - `genotypes` - a flattened, two-dimensional array of GenotypeAllele,
662    ///   the first dimension contains one array for each sample.
663    ///
664    /// # Errors
665    ///
666    /// Returns error if GT tag is not present in header.
667    ///
668    /// # Example
669    ///
670    /// Example assumes we have a Record `record` from a VCF with a `GT` `FORMAT` tag.
671    /// See [module documentation](../index.html#example-writing) for how to set up
672    /// VCF, header, and record.
673    ///
674    /// ```
675    /// # use rust_htslib::bcf::{Format, Writer};
676    /// # use rust_htslib::bcf::header::Header;
677    /// # use rust_htslib::bcf::record::GenotypeAllele;
678    /// # let mut header = Header::new();
679    /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
680    /// # header.push_record(header_contig_line.as_bytes());
681    /// # let header_gt_line = r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#;
682    /// # header.push_record(header_gt_line.as_bytes());
683    /// # header.push_sample("test_sample".as_bytes());
684    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
685    /// # let mut record = vcf.empty_record();
686    /// let alleles = &[GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(1)];
687    /// record.push_genotypes(alleles);
688    /// assert_eq!("1/1", &format!("{}", record.genotypes().unwrap().get(0)));
689    /// ```
690    pub fn push_genotypes(&mut self, genotypes: &[GenotypeAllele]) -> Result<()> {
691        let encoded: Vec<i32> = genotypes.iter().map(|gt| i32::from(*gt)).collect();
692        self.push_format_integer(b"GT", &encoded)
693    }
694
695    /// Add/replace genotypes in FORMAT GT tag by providing a list of genotypes.
696    ///
697    /// # Arguments
698    ///
699    /// - `genotypes` - a two-dimensional array of GenotypeAllele
700    /// - `max_ploidy` - the maximum number of alleles allowed for any genotype on any sample.
701    ///
702    /// # Errors
703    ///
704    /// Returns an error if any genotype has more allelles than `max_ploidy` or if the GT tag is not present in the header.
705    ///
706    /// # Example
707    ///
708    /// Example assumes we have a Record `record` from a VCF with a `GT` `FORMAT` tag and three samples.
709    /// See [module documentation](../index.html#example-writing) for how to set up
710    /// VCF, header, and record.
711    ///
712    /// ```
713    /// # use rust_htslib::bcf::{Format, Writer};
714    /// # use rust_htslib::bcf::header::Header;
715    /// # use rust_htslib::bcf::record::GenotypeAllele;
716    /// # use std::iter;
717    /// # let mut header = Header::new();
718    /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
719    /// # header.push_record(header_contig_line.as_bytes());
720    /// # let header_gt_line = r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#;
721    /// # header.push_record(header_gt_line.as_bytes());
722    /// # header.push_sample("first_sample".as_bytes());
723    /// # header.push_sample("second_sample".as_bytes());
724    /// # header.push_sample("third_sample".as_bytes());
725    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf)?;
726    /// # let mut record = vcf.empty_record();
727    /// let alleles = vec![
728    ///     vec![GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(1)],
729    ///     vec![GenotypeAllele::Unphased(0), GenotypeAllele::Phased(1)],
730    ///     vec![GenotypeAllele::Unphased(0)],
731    /// ];
732    /// record.push_genotype_structured(&alleles, 2);
733    /// let gts = record.genotypes()?;
734    /// assert_eq!("1/1", &format!("{}", gts.get(0)));
735    /// assert_eq!("0|1", &format!("{}", gts.get(1)));
736    /// assert_eq!("0", &format!("{}", gts.get(2)));
737    /// # Ok::<(), rust_htslib::errors::Error>(())
738    /// ```
739    pub fn push_genotype_structured<GT>(
740        &mut self,
741        genotypes: &[GT],
742        max_ploidy: usize,
743    ) -> Result<()>
744    where
745        GT: AsRef<[GenotypeAllele]>,
746    {
747        let mut data = Vec::with_capacity(max_ploidy * genotypes.len());
748        for gt in genotypes {
749            if gt.as_ref().len() > max_ploidy {
750                return Err(Error::BcfSetValues);
751            }
752            data.extend(
753                gt.as_ref()
754                    .iter()
755                    .map(|gta| i32::from(*gta))
756                    .chain(iter::repeat_n(
757                        VECTOR_END_INTEGER,
758                        max_ploidy - gt.as_ref().len(),
759                    )),
760            );
761        }
762        self.push_format_integer(b"GT", &data)
763    }
764
765    /// Get genotypes as vector of one `Genotype` per sample.
766    ///
767    /// # Example
768    /// Parsing genotype field (`GT` tag) from a VCF record:
769    /// ```
770    /// use crate::rust_htslib::bcf::{Reader, Read};
771    /// let mut vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
772    /// let expected = ["./1", "1|1", "0/1", "0|1", "1|.", "1/1"];
773    /// for (rec, exp_gt) in vcf.records().zip(expected.iter()) {
774    ///     let mut rec = rec.expect("Error reading record.");
775    ///     let genotypes = rec.genotypes().expect("Error reading genotypes");
776    ///     assert_eq!(&format!("{}", genotypes.get(0)), exp_gt);
777    /// }
778    /// ```
779    pub fn genotypes(&self) -> Result<Genotypes<'_, Buffer>> {
780        self.genotypes_shared_buffer(Buffer::new())
781    }
782
783    /// Get genotypes as vector of one `Genotype` per sample, using a given shared buffer
784    /// to avoid unnecessary allocations.
785    pub fn genotypes_shared_buffer<'a, B>(&self, buffer: B) -> Result<Genotypes<'a, B>>
786    where
787        B: BorrowMut<Buffer> + Borrow<Buffer> + 'a,
788    {
789        Ok(Genotypes {
790            encoded: self.format_shared_buffer(b"GT", buffer).integer()?,
791        })
792    }
793
794    /// Retrieve data for a `FORMAT` field
795    ///
796    /// # Example
797    /// *Note: some boilerplate for the example is hidden for clarity. See [module documentation](../index.html#example-writing)
798    /// for an example of the setup used here.*
799    ///
800    /// ```rust
801    /// # use rust_htslib::bcf::{Format, Writer};
802    /// # use rust_htslib::bcf::header::Header;
803    /// #
804    /// # // Create minimal VCF header with a single sample
805    /// # let mut header = Header::new();
806    /// header.push_sample(b"sample1").push_sample(b"sample2").push_record(br#"##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">"#);
807    /// #
808    /// # // Write uncompressed VCF to stdout with above header and get an empty record
809    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
810    /// # let mut record = vcf.empty_record();
811    /// record.push_format_integer(b"DP", &[20, 12]).expect("Failed to set DP format field");
812    ///
813    /// let read_depths = record.format(b"DP").integer().expect("Couldn't retrieve DP field");
814    /// let sample1_depth = read_depths[0];
815    /// assert_eq!(sample1_depth, &[20]);
816    /// let sample2_depth = read_depths[1];
817    /// assert_eq!(sample2_depth, &[12])
818    /// ```
819    ///
820    /// # Errors
821    /// **Attention:** the returned [`BufferBacked`] from [`integer()`](Format::integer)
822    /// (`read_depths`), which holds the data, has to be kept in scope as long as the data is
823    /// accessed. If parts of the data are accessed after the `BufferBacked` object is been
824    /// dropped, you will access unallocated memory.
825    pub fn format<'a>(&'a self, tag: &'a [u8]) -> Format<'a, Buffer> {
826        self.format_shared_buffer(tag, Buffer::new())
827    }
828
829    /// Get the value of the given format tag for each sample.
830    pub fn format_shared_buffer<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b>(
831        &'a self,
832        tag: &'a [u8],
833        buffer: B,
834    ) -> Format<'a, B> {
835        Format::new(self, tag, buffer)
836    }
837
838    /// Add/replace an integer-typed FORMAT tag.
839    ///
840    /// # Arguments
841    ///
842    /// - `tag` - The tag's string.
843    /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
844    ///   for each sample.
845    ///
846    /// # Errors
847    ///
848    /// Returns error if tag is not present in header.
849    pub fn push_format_integer(&mut self, tag: &[u8], data: &[i32]) -> Result<()> {
850        self.push_format(tag, data, htslib::BCF_HT_INT)
851    }
852
853    /// Add/replace a float-typed FORMAT tag.
854    ///
855    /// # Arguments
856    ///
857    /// - `tag` - The tag's string.
858    /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
859    ///   for each sample.
860    ///
861    /// # Errors
862    ///
863    /// Returns error if tag is not present in header.
864    ///
865    /// # Example
866    ///
867    /// Example assumes we have a Record `record` from a VCF with an `AF` `FORMAT` tag.
868    /// See [module documentation](../index.html#example-writing) for how to set up
869    /// VCF, header, and record.
870    ///
871    /// ```
872    /// # use rust_htslib::bcf::{Format, Writer};
873    /// # use rust_htslib::bcf::header::Header;
874    /// # use rust_htslib::bcf::record::GenotypeAllele;
875    /// # let mut header = Header::new();
876    /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
877    /// # header.push_record(header_contig_line.as_bytes());
878    /// # let header_af_line = r#"##FORMAT=<ID=AF,Number=1,Type=Float,Description="Frequency">"#;
879    /// # header.push_record(header_af_line.as_bytes());
880    /// # header.push_sample("test_sample".as_bytes());
881    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
882    /// # let mut record = vcf.empty_record();
883    /// record.push_format_float(b"AF", &[0.5]);
884    /// assert_eq!(0.5, record.format(b"AF").float().unwrap()[0][0]);
885    /// ```
886    pub fn push_format_float(&mut self, tag: &[u8], data: &[f32]) -> Result<()> {
887        self.push_format(tag, data, htslib::BCF_HT_REAL)
888    }
889
890    /// Add/replace a single-char-typed FORMAT tag.
891    ///
892    /// # Arguments
893    ///
894    /// - `tag` - The tag's string.
895    /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
896    ///   for each sample.
897    ///
898    /// # Errors
899    ///
900    /// Returns error if tag is not present in header.
901    pub fn push_format_char(&mut self, tag: &[u8], data: &[u8]) -> Result<()> {
902        self.push_format(tag, data, htslib::BCF_HT_STR)
903    }
904
905    /// Add a format tag. Data is a flattened two-dimensional array.
906    /// The first dimension contains one array for each sample.
907    fn push_format<T>(&mut self, tag: &[u8], data: &[T], ht: u32) -> Result<()> {
908        let tag_c_str = ffi::CString::new(tag).unwrap();
909        unsafe {
910            if htslib::bcf_update_format(
911                self.header().inner,
912                self.inner,
913                tag_c_str.as_ptr() as *mut c_char,
914                data.as_ptr() as *const ::std::os::raw::c_void,
915                data.len() as i32,
916                ht as i32,
917            ) == 0
918            {
919                Ok(())
920            } else {
921                Err(Error::BcfSetTag {
922                    tag: str::from_utf8(tag).unwrap().to_owned(),
923                })
924            }
925        }
926    }
927
928    // TODO: should we add convenience methods clear_format_*?
929
930    /// Add a string-typed FORMAT tag. Note that genotypes are treated as a special case
931    /// and cannot be added with this method. See instead [push_genotypes](#method.push_genotypes).
932    ///
933    /// # Arguments
934    ///
935    /// - `tag` - The tag's string.
936    /// - `data` - a two-dimensional array, the first dimension contains one array
937    ///   for each sample. Must be non-empty.
938    ///
939    /// # Errors
940    ///
941    /// Returns error if tag is not present in header.
942    pub fn push_format_string<D: Borrow<[u8]>>(&mut self, tag: &[u8], data: &[D]) -> Result<()> {
943        assert!(
944            !data.is_empty(),
945            "given string data must have at least 1 element"
946        );
947        let c_data = data
948            .iter()
949            .map(|s| ffi::CString::new(s.borrow()).unwrap())
950            .collect::<Vec<ffi::CString>>();
951        let c_ptrs = c_data
952            .iter()
953            .map(|s| s.as_ptr() as *mut i8)
954            .collect::<Vec<*mut i8>>();
955        let tag_c_str = ffi::CString::new(tag).unwrap();
956        unsafe {
957            if htslib::bcf_update_format_string(
958                self.header().inner,
959                self.inner,
960                tag_c_str.as_ptr() as *mut c_char,
961                c_ptrs.as_slice().as_ptr() as *mut *const c_char,
962                data.len() as i32,
963            ) == 0
964            {
965                Ok(())
966            } else {
967                Err(Error::BcfSetTag {
968                    tag: str::from_utf8(tag).unwrap().to_owned(),
969                })
970            }
971        }
972    }
973
974    /// Add/replace an integer-typed INFO entry.
975    pub fn push_info_integer(&mut self, tag: &[u8], data: &[i32]) -> Result<()> {
976        self.push_info(tag, data, htslib::BCF_HT_INT)
977    }
978
979    /// Remove the integer-typed INFO entry.
980    pub fn clear_info_integer(&mut self, tag: &[u8]) -> Result<()> {
981        self.push_info::<i32>(tag, &[], htslib::BCF_HT_INT)
982    }
983
984    /// Add/replace a float-typed INFO entry.
985    pub fn push_info_float(&mut self, tag: &[u8], data: &[f32]) -> Result<()> {
986        self.push_info(tag, data, htslib::BCF_HT_REAL)
987    }
988
989    /// Remove the float-typed INFO entry.
990    pub fn clear_info_float(&mut self, tag: &[u8]) -> Result<()> {
991        self.push_info::<u8>(tag, &[], htslib::BCF_HT_REAL)
992    }
993
994    /// Add/replace an INFO tag.
995    ///
996    /// # Arguments
997    /// * `tag` - the tag to add/replace
998    /// * `data` - the data to set
999    /// * `ht` - the HTSLib type to use
1000    fn push_info<T>(&mut self, tag: &[u8], data: &[T], ht: u32) -> Result<()> {
1001        let tag_c_str = ffi::CString::new(tag).unwrap();
1002        unsafe {
1003            if htslib::bcf_update_info(
1004                self.header().inner,
1005                self.inner,
1006                tag_c_str.as_ptr() as *mut c_char,
1007                data.as_ptr() as *const ::std::os::raw::c_void,
1008                data.len() as i32,
1009                ht as i32,
1010            ) == 0
1011            {
1012                Ok(())
1013            } else {
1014                Err(Error::BcfSetTag {
1015                    tag: str::from_utf8(tag).unwrap().to_owned(),
1016                })
1017            }
1018        }
1019    }
1020
1021    /// Set flag into the INFO column.
1022    pub fn push_info_flag(&mut self, tag: &[u8]) -> Result<()> {
1023        self.push_info_string_impl(tag, &[b""], htslib::BCF_HT_FLAG)
1024    }
1025
1026    /// Remove the flag from the INFO column.
1027    pub fn clear_info_flag(&mut self, tag: &[u8]) -> Result<()> {
1028        self.push_info_string_impl(tag, &[], htslib::BCF_HT_FLAG)
1029    }
1030
1031    /// Add/replace a string-typed INFO entry.
1032    pub fn push_info_string(&mut self, tag: &[u8], data: &[&[u8]]) -> Result<()> {
1033        self.push_info_string_impl(tag, data, htslib::BCF_HT_STR)
1034    }
1035
1036    /// Remove the string field from the INFO column.
1037    pub fn clear_info_string(&mut self, tag: &[u8]) -> Result<()> {
1038        self.push_info_string_impl(tag, &[], htslib::BCF_HT_STR)
1039    }
1040
1041    /// Add an string-valued INFO tag.
1042    fn push_info_string_impl(&mut self, tag: &[u8], data: &[&[u8]], ht: u32) -> Result<()> {
1043        let mut buf: Vec<u8> = Vec::new();
1044        for (i, &s) in data.iter().enumerate() {
1045            if i > 0 {
1046                buf.extend(b",");
1047            }
1048            buf.extend(s);
1049        }
1050        let c_str = ffi::CString::new(buf).unwrap();
1051        let len = if ht == htslib::BCF_HT_FLAG {
1052            data.len()
1053        } else {
1054            c_str.to_bytes().len()
1055        };
1056        let tag_c_str = ffi::CString::new(tag).unwrap();
1057        unsafe {
1058            if htslib::bcf_update_info(
1059                self.header().inner,
1060                self.inner,
1061                tag_c_str.as_ptr() as *mut c_char,
1062                c_str.as_ptr() as *const ::std::os::raw::c_void,
1063                len as i32,
1064                ht as i32,
1065            ) == 0
1066            {
1067                Ok(())
1068            } else {
1069                Err(Error::BcfSetTag {
1070                    tag: str::from_utf8(tag).unwrap().to_owned(),
1071                })
1072            }
1073        }
1074    }
1075
1076    /// Remove unused alleles.
1077    pub fn trim_alleles(&mut self) -> Result<()> {
1078        match unsafe { htslib::bcf_trim_alleles(self.header().inner, self.inner) } {
1079            -1 => Err(Error::BcfRemoveAlleles),
1080            _ => Ok(()),
1081        }
1082    }
1083
1084    pub fn remove_alleles(&mut self, remove: &[bool]) -> Result<()> {
1085        let rm_set = unsafe { htslib::kbs_init(remove.len()) };
1086
1087        for (i, &r) in remove.iter().enumerate() {
1088            if r {
1089                unsafe {
1090                    htslib::kbs_insert(rm_set, i as i32);
1091                }
1092            }
1093        }
1094
1095        let ret = unsafe { htslib::bcf_remove_allele_set(self.header().inner, self.inner, rm_set) };
1096
1097        unsafe {
1098            htslib::kbs_destroy(rm_set);
1099        }
1100
1101        match ret {
1102            -1 => Err(Error::BcfRemoveAlleles),
1103            _ => Ok(()),
1104        }
1105    }
1106
1107    /// Get the length of the reference allele. If the record has no reference allele, then the
1108    /// result will be `0`.
1109    ///
1110    /// # Example
1111    /// ```rust
1112    /// # use rust_htslib::bcf::{Format, Writer};
1113    /// # use rust_htslib::bcf::header::Header;
1114    /// #
1115    /// # // Create minimal VCF header with a single sample
1116    /// # let mut header = Header::new();
1117    /// # header.push_sample("sample".as_bytes());
1118    /// #
1119    /// # // Write uncompressed VCF to stdout with above header and get an empty record
1120    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
1121    /// # let mut record = vcf.empty_record();
1122    /// # assert_eq!(record.rlen(), 0);
1123    /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1124    /// record.set_alleles(alleles).expect("Failed to set alleles");
1125    /// assert_eq!(record.rlen(), 3)
1126    /// ```
1127    pub fn rlen(&self) -> i64 {
1128        self.inner().rlen
1129    }
1130
1131    /// Clear all parts of the record. Useful if you plan to reuse a record object multiple times.
1132    ///
1133    /// # Example
1134    /// ```rust
1135    /// # use rust_htslib::bcf::{Format, Writer};
1136    /// # use rust_htslib::bcf::header::Header;
1137    /// #
1138    /// # // Create minimal VCF header with a single sample
1139    /// # let mut header = Header::new();
1140    /// # header.push_sample("sample".as_bytes());
1141    /// #
1142    /// # // Write uncompressed VCF to stdout with above header and get an empty record
1143    /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
1144    /// # let mut record = vcf.empty_record();
1145    /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1146    /// record.set_alleles(alleles).expect("Failed to set alleles");
1147    /// record.set_pos(6);
1148    /// record.clear();
1149    /// assert_eq!(record.rlen(), 0);
1150    /// assert_eq!(record.pos(), 0)
1151    /// ```
1152    pub fn clear(&self) {
1153        unsafe { htslib::bcf_clear(self.inner) }
1154    }
1155
1156    /// Provide short description of record for locating it in the BCF/VCF file.
1157    pub fn desc(&self) -> String {
1158        if let Some(rid) = self.rid() {
1159            if let Ok(contig) = self.header.rid2name(rid) {
1160                return format!("{}:{}", str::from_utf8(contig).unwrap(), self.pos());
1161            }
1162        }
1163        "".to_owned()
1164    }
1165
1166    /// Convert to VCF String
1167    ///
1168    /// Intended for debug only. Use Writer for efficient VCF output.
1169    ///
1170    pub fn to_vcf_string(&self) -> Result<String> {
1171        let mut buf = htslib::kstring_t {
1172            l: 0,
1173            m: 0,
1174            s: ptr::null_mut(),
1175        };
1176        let ret = unsafe { htslib::vcf_format(self.header().inner, self.inner, &mut buf) };
1177
1178        if ret < 0 {
1179            if !buf.s.is_null() {
1180                unsafe {
1181                    libc::free(buf.s as *mut libc::c_void);
1182                }
1183            }
1184            return Err(Error::BcfToString);
1185        }
1186
1187        let vcf_str = unsafe {
1188            let vcf_str = String::from(ffi::CStr::from_ptr(buf.s).to_str().unwrap());
1189            if !buf.s.is_null() {
1190                libc::free(buf.s as *mut libc::c_void);
1191            }
1192            vcf_str
1193        };
1194
1195        Ok(vcf_str)
1196    }
1197}
1198
1199impl Clone for Record {
1200    fn clone(&self) -> Self {
1201        let inner = unsafe { htslib::bcf_dup(self.inner) };
1202        Record {
1203            inner,
1204            header: self.header.clone(),
1205        }
1206    }
1207}
1208
1209impl genome::AbstractLocus for Record {
1210    fn contig(&self) -> &str {
1211        str::from_utf8(
1212            self.header()
1213                .rid2name(self.rid().expect("rid not set"))
1214                .expect("unable to find rid in header"),
1215        )
1216        .expect("unable to interpret contig name as UTF-8")
1217    }
1218
1219    fn pos(&self) -> u64 {
1220        self.pos() as u64
1221    }
1222}
1223
1224/// Phased or unphased alleles, represented as indices.
1225#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1226pub enum GenotypeAllele {
1227    Unphased(i32),
1228    Phased(i32),
1229    UnphasedMissing,
1230    PhasedMissing,
1231}
1232
1233impl GenotypeAllele {
1234    /// Decode given integer according to BCF standard.
1235    #[deprecated(
1236        since = "0.36.0",
1237        note = "Please use the conversion trait From<i32> for GenotypeAllele instead."
1238    )]
1239    pub fn from_encoded(encoded: i32) -> Self {
1240        match (encoded, encoded & 1) {
1241            (0, 0) => GenotypeAllele::UnphasedMissing,
1242            (1, 1) => GenotypeAllele::PhasedMissing,
1243            (e, 1) => GenotypeAllele::Phased((e >> 1) - 1),
1244            (e, 0) => GenotypeAllele::Unphased((e >> 1) - 1),
1245            _ => panic!("unexpected phasing type"),
1246        }
1247    }
1248
1249    /// Get the index into the list of alleles.
1250    pub fn index(self) -> Option<u32> {
1251        match self {
1252            GenotypeAllele::Unphased(i) | GenotypeAllele::Phased(i) => Some(i as u32),
1253            GenotypeAllele::UnphasedMissing | GenotypeAllele::PhasedMissing => None,
1254        }
1255    }
1256}
1257
1258impl fmt::Display for GenotypeAllele {
1259    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1260        match self.index() {
1261            Some(a) => write!(f, "{}", a),
1262            None => write!(f, "."),
1263        }
1264    }
1265}
1266
1267impl From<GenotypeAllele> for i32 {
1268    fn from(allele: GenotypeAllele) -> i32 {
1269        let (allele, phased) = match allele {
1270            GenotypeAllele::UnphasedMissing => (-1, 0),
1271            GenotypeAllele::PhasedMissing => (-1, 1),
1272            GenotypeAllele::Unphased(a) => (a, 0),
1273            GenotypeAllele::Phased(a) => (a, 1),
1274        };
1275        ((allele + 1) << 1) | phased
1276    }
1277}
1278
1279impl From<i32> for GenotypeAllele {
1280    fn from(encoded: i32) -> GenotypeAllele {
1281        match (encoded, encoded & 1) {
1282            (0, 0) => GenotypeAllele::UnphasedMissing,
1283            (1, 1) => GenotypeAllele::PhasedMissing,
1284            (e, 1) => GenotypeAllele::Phased((e >> 1) - 1),
1285            (e, 0) => GenotypeAllele::Unphased((e >> 1) - 1),
1286            _ => panic!("unexpected phasing type"),
1287        }
1288    }
1289}
1290
1291custom_derive! {
1292    /// Genotype representation as a vector of `GenotypeAllele`.
1293    #[derive(NewtypeDeref, Debug, Clone, PartialEq, Eq, Hash)]
1294    pub struct Genotype(Vec<GenotypeAllele>);
1295}
1296
1297impl fmt::Display for Genotype {
1298    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1299        let Genotype(alleles) = self;
1300        write!(f, "{}", alleles[0])?;
1301        for a in &alleles[1..] {
1302            let sep = match a {
1303                GenotypeAllele::Phased(_) | GenotypeAllele::PhasedMissing => '|',
1304                GenotypeAllele::Unphased(_) | GenotypeAllele::UnphasedMissing => '/',
1305            };
1306            write!(f, "{}{}", sep, a)?;
1307        }
1308        Ok(())
1309    }
1310}
1311
1312/// Lazy representation of genotypes, that does no computation until a particular genotype is queried.
1313#[derive(Debug)]
1314pub struct Genotypes<'a, B>
1315where
1316    B: Borrow<Buffer> + 'a,
1317{
1318    encoded: BufferBacked<'a, Vec<&'a [i32]>, B>,
1319}
1320
1321impl<'a, B: Borrow<Buffer> + 'a> Genotypes<'a, B> {
1322    /// Get genotype of ith sample.
1323    ///
1324    /// Note that the result complies with the BCF spec. This means that the
1325    /// first allele will always be marked as `Unphased`. That is, if you have 1|1 in the VCF,
1326    /// this method will return `[Unphased(1), Phased(1)]`.
1327    pub fn get(&self, i: usize) -> Genotype {
1328        let igt = self.encoded[i];
1329        let allelles = igt
1330            .iter()
1331            .take_while(|&&i| i != VECTOR_END_INTEGER)
1332            .map(|&i| GenotypeAllele::from(i))
1333            .collect();
1334        Genotype(allelles)
1335    }
1336}
1337
1338impl Drop for Record {
1339    fn drop(&mut self) {
1340        unsafe { htslib::bcf_destroy(self.inner) };
1341    }
1342}
1343
1344unsafe impl Send for Record {}
1345
1346unsafe impl Sync for Record {}
1347
1348/// Info tag representation.
1349#[derive(Debug)]
1350pub struct Info<'a, B: BorrowMut<Buffer> + Borrow<Buffer>> {
1351    record: &'a Record,
1352    tag: &'a [u8],
1353    buffer: B,
1354}
1355
1356pub type BufferBackedOption<'b, B> = Option<BufferBacked<'b, Vec<&'b [u8]>, B>>;
1357
1358impl<'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Info<'_, B> {
1359    /// Short description of info tag.
1360    pub fn desc(&self) -> String {
1361        str::from_utf8(self.tag).unwrap().to_owned()
1362    }
1363
1364    fn data(&mut self, data_type: u32) -> Result<Option<i32>> {
1365        let mut n: i32 = self.buffer.borrow().len;
1366        let c_str = ffi::CString::new(self.tag).unwrap();
1367        let ret = unsafe {
1368            htslib::bcf_get_info_values(
1369                self.record.header().inner,
1370                self.record.inner,
1371                c_str.as_ptr() as *mut c_char,
1372                &mut self.buffer.borrow_mut().inner,
1373                &mut n,
1374                data_type as i32,
1375            )
1376        };
1377        self.buffer.borrow_mut().len = n;
1378
1379        match ret {
1380            -1 => Err(Error::BcfUndefinedTag { tag: self.desc() }),
1381            -2 => Err(Error::BcfUnexpectedType { tag: self.desc() }),
1382            -3 => Ok(None),
1383            ret => Ok(Some(ret)),
1384        }
1385    }
1386
1387    /// Get integers from tag. `None` if tag not present in record.
1388    ///
1389    /// Import `bcf::record::Numeric` for missing value handling.
1390    ///
1391    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1392    /// as along as the data is accessed. If parts of the data are accessed while
1393    /// the BufferBacked object is already dropped, you will access unallocated
1394    /// memory.
1395    pub fn integer(mut self) -> Result<Option<BufferBacked<'b, &'b [i32], B>>> {
1396        self.data(htslib::BCF_HT_INT).map(|data| {
1397            data.map(|ret| {
1398                let values = unsafe {
1399                    slice::from_raw_parts(self.buffer.borrow().inner as *const i32, ret as usize)
1400                };
1401                BufferBacked::new(&values[..ret as usize], self.buffer)
1402            })
1403        })
1404    }
1405
1406    /// Get floats from tag. `None` if tag not present in record.
1407    ///
1408    /// Import `bcf::record::Numeric` for missing value handling.
1409    ///
1410    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1411    /// as along as the data is accessed. If parts of the data are accessed while
1412    /// the BufferBacked object is already dropped, you will access unallocated
1413    /// memory.
1414    pub fn float(mut self) -> Result<Option<BufferBacked<'b, &'b [f32], B>>> {
1415        self.data(htslib::BCF_HT_REAL).map(|data| {
1416            data.map(|ret| {
1417                let values = unsafe {
1418                    slice::from_raw_parts(self.buffer.borrow().inner as *const f32, ret as usize)
1419                };
1420                BufferBacked::new(&values[..ret as usize], self.buffer)
1421            })
1422        })
1423    }
1424
1425    /// Get flags from tag. `false` if not set.
1426    pub fn flag(&mut self) -> Result<bool> {
1427        self.data(htslib::BCF_HT_FLAG).map(|data| match data {
1428            Some(ret) => ret == 1,
1429            None => false,
1430        })
1431    }
1432
1433    /// Get strings from tag. `None` if tag not present in record.
1434    ///
1435    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1436    /// as along as the data is accessed. If parts of the data are accessed while
1437    /// the BufferBacked object is already dropped, you will access unallocated
1438    /// memory.
1439    pub fn string(mut self) -> Result<BufferBackedOption<'b, B>> {
1440        self.data(htslib::BCF_HT_STR).map(|data| {
1441            data.map(|ret| {
1442                BufferBacked::new(
1443                    unsafe {
1444                        slice::from_raw_parts(self.buffer.borrow().inner as *const u8, ret as usize)
1445                    }
1446                    .split(|c| *c == b',')
1447                    .map(|s| {
1448                        // stop at zero character
1449                        s.split(|c| *c == 0u8)
1450                            .next()
1451                            .expect("Bug: returned string should not be empty.")
1452                    })
1453                    .collect(),
1454                    self.buffer,
1455                )
1456            })
1457        })
1458    }
1459}
1460
1461unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Send for Info<'_, B> {}
1462
1463unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Sync for Info<'_, B> {}
1464
1465fn trim_slice<T: PartialEq + NumericUtils>(s: &[T]) -> &[T] {
1466    s.split(|v| v.is_vector_end())
1467        .next()
1468        .expect("Bug: returned slice should not be empty.")
1469}
1470
1471// Representation of per-sample data.
1472#[derive(Debug)]
1473pub struct Format<'a, B: BorrowMut<Buffer> + Borrow<Buffer>> {
1474    record: &'a Record,
1475    tag: &'a [u8],
1476    inner: *mut htslib::bcf_fmt_t,
1477    buffer: B,
1478}
1479
1480impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Format<'a, B> {
1481    /// Create new format data in a given record.
1482    fn new(record: &'a Record, tag: &'a [u8], buffer: B) -> Format<'a, B> {
1483        let c_str = ffi::CString::new(tag).unwrap();
1484        let inner = unsafe {
1485            htslib::bcf_get_fmt(
1486                record.header().inner,
1487                record.inner,
1488                c_str.as_ptr() as *mut c_char,
1489            )
1490        };
1491        Format {
1492            record,
1493            tag,
1494            inner,
1495            buffer,
1496        }
1497    }
1498
1499    /// Provide short description of format entry (just the tag name).
1500    pub fn desc(&self) -> String {
1501        str::from_utf8(self.tag).unwrap().to_owned()
1502    }
1503
1504    pub fn inner(&self) -> &htslib::bcf_fmt_t {
1505        unsafe { &*self.inner }
1506    }
1507
1508    pub fn inner_mut(&mut self) -> &mut htslib::bcf_fmt_t {
1509        unsafe { &mut *self.inner }
1510    }
1511
1512    fn values_per_sample(&self) -> usize {
1513        self.inner().n as usize
1514    }
1515
1516    /// Read and decode format data into a given type.
1517    fn data(&mut self, data_type: u32) -> Result<i32> {
1518        let mut n: i32 = self.buffer.borrow().len;
1519        let c_str = ffi::CString::new(self.tag).unwrap();
1520        let ret = unsafe {
1521            htslib::bcf_get_format_values(
1522                self.record.header().inner,
1523                self.record.inner,
1524                c_str.as_ptr() as *mut c_char,
1525                &mut self.buffer.borrow_mut().inner,
1526                &mut n,
1527                data_type as i32,
1528            )
1529        };
1530        self.buffer.borrow_mut().len = n;
1531        match ret {
1532            -1 => Err(Error::BcfUndefinedTag { tag: self.desc() }),
1533            -2 => Err(Error::BcfUnexpectedType { tag: self.desc() }),
1534            -3 => Err(Error::BcfMissingTag {
1535                tag: self.desc(),
1536                record: self.record.desc(),
1537            }),
1538            ret => Ok(ret),
1539        }
1540    }
1541
1542    /// Get format data as integers.
1543    ///
1544    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1545    /// as long as the data is accessed. If parts of the data are accessed while
1546    /// the BufferBacked object is already dropped, you will access unallocated
1547    /// memory.
1548    pub fn integer(mut self) -> Result<BufferBacked<'b, Vec<&'b [i32]>, B>> {
1549        self.data(htslib::BCF_HT_INT).map(|ret| {
1550            BufferBacked::new(
1551                unsafe {
1552                    slice::from_raw_parts(
1553                        self.buffer.borrow_mut().inner as *const i32,
1554                        ret as usize,
1555                    )
1556                }
1557                .chunks(self.values_per_sample())
1558                .map(trim_slice)
1559                .collect(),
1560                self.buffer,
1561            )
1562        })
1563    }
1564
1565    /// Get format data as floats.
1566    ///
1567    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1568    /// as along as the data is accessed. If parts of the data are accessed while
1569    /// the BufferBacked object is already dropped, you will access unallocated
1570    /// memory.
1571    pub fn float(mut self) -> Result<BufferBacked<'b, Vec<&'b [f32]>, B>> {
1572        self.data(htslib::BCF_HT_REAL).map(|ret| {
1573            BufferBacked::new(
1574                unsafe {
1575                    slice::from_raw_parts(
1576                        self.buffer.borrow_mut().inner as *const f32,
1577                        ret as usize,
1578                    )
1579                }
1580                .chunks(self.values_per_sample())
1581                .map(trim_slice)
1582                .collect(),
1583                self.buffer,
1584            )
1585        })
1586    }
1587
1588    /// Get format data as byte slices. To obtain the values strings, use `std::str::from_utf8`.
1589    ///
1590    /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1591    /// as along as the data is accessed. If parts of the data are accessed while
1592    /// the BufferBacked object is already dropped, you will access unallocated
1593    /// memory.
1594    pub fn string(mut self) -> Result<BufferBacked<'b, Vec<&'b [u8]>, B>> {
1595        self.data(htslib::BCF_HT_STR).map(|ret| {
1596            if ret == 0 {
1597                return BufferBacked::new(Vec::new(), self.buffer);
1598            }
1599            BufferBacked::new(
1600                unsafe {
1601                    slice::from_raw_parts(self.buffer.borrow_mut().inner as *const u8, ret as usize)
1602                }
1603                .chunks(self.values_per_sample())
1604                .map(|s| {
1605                    // stop at zero character
1606                    s.split(|c| *c == 0u8)
1607                        .next()
1608                        .expect("Bug: returned string should not be empty.")
1609                })
1610                .collect(),
1611                self.buffer,
1612            )
1613        })
1614    }
1615}
1616
1617unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Send for Format<'_, B> {}
1618
1619unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Sync for Format<'_, B> {}
1620
1621#[derive(Debug)]
1622pub struct Filters<'a> {
1623    /// Reference to the `Record` to enumerate records for.
1624    record: &'a Record,
1625    /// Index of the next filter to return, if not at end.
1626    idx: i32,
1627}
1628
1629impl<'a> Filters<'a> {
1630    pub fn new(record: &'a Record) -> Self {
1631        Filters { record, idx: 0 }
1632    }
1633}
1634
1635impl Iterator for Filters<'_> {
1636    type Item = Id;
1637
1638    fn next(&mut self) -> Option<Id> {
1639        if self.record.inner().d.n_flt <= self.idx {
1640            None
1641        } else {
1642            let i = self.idx as isize;
1643            self.idx += 1;
1644            Some(Id(unsafe { *self.record.inner().d.flt.offset(i) } as u32))
1645        }
1646    }
1647}
1648
1649#[cfg(test)]
1650mod tests {
1651    use super::*;
1652    use crate::bcf::{Format, Header, Writer};
1653    use tempfile::NamedTempFile;
1654
1655    #[test]
1656    fn test_missing_float() {
1657        let expected: u32 = 0x7F80_0001;
1658        assert_eq!(MISSING_FLOAT.bits(), expected);
1659    }
1660
1661    #[test]
1662    fn test_vector_end_float() {
1663        let expected: u32 = 0x7F80_0002;
1664        assert_eq!(VECTOR_END_FLOAT.bits(), expected);
1665    }
1666
1667    #[test]
1668    fn test_record_rlen() {
1669        let tmp = NamedTempFile::new().unwrap();
1670        let path = tmp.path();
1671        let header = Header::new();
1672        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1673        let mut record = vcf.empty_record();
1674        assert_eq!(record.rlen(), 0);
1675        let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1676        record.set_alleles(alleles).expect("Failed to set alleles");
1677        assert_eq!(record.rlen(), 3)
1678    }
1679
1680    #[test]
1681    fn test_record_end() {
1682        let tmp = NamedTempFile::new().unwrap();
1683        let path = tmp.path();
1684        let header = Header::new();
1685        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1686        let mut record = vcf.empty_record();
1687        let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1688        record.set_alleles(alleles).expect("Failed to set alleles");
1689        record.set_pos(5);
1690
1691        assert_eq!(record.end(), 8)
1692    }
1693
1694    #[test]
1695    fn test_record_clear() {
1696        let tmp = NamedTempFile::new().unwrap();
1697        let path = tmp.path();
1698        let mut header = Header::new();
1699        header.push_sample("sample".as_bytes());
1700        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1701        let mut record = vcf.empty_record();
1702        let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1703        record.set_alleles(alleles).expect("Failed to set alleles");
1704        record.set_pos(6);
1705        record.clear();
1706
1707        assert_eq!(record.rlen(), 0);
1708        assert_eq!(record.sample_count(), 0);
1709        assert_eq!(record.pos(), 0)
1710    }
1711
1712    #[test]
1713    fn test_record_clone() {
1714        let tmp = NamedTempFile::new().unwrap();
1715        let path = tmp.path();
1716        let header = Header::new();
1717        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1718        let mut record = vcf.empty_record();
1719        let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1720        record.set_alleles(alleles).expect("Failed to set alleles");
1721        record.set_pos(6);
1722
1723        let mut cloned_record = record.clone();
1724        cloned_record.set_pos(5);
1725
1726        assert_eq!(record.pos(), 6);
1727        assert_eq!(record.allele_count(), 2);
1728        assert_eq!(cloned_record.pos(), 5);
1729        assert_eq!(cloned_record.allele_count(), 2);
1730    }
1731
1732    #[test]
1733    fn test_record_has_filter_pass_is_default() {
1734        let tmp = NamedTempFile::new().unwrap();
1735        let path = tmp.path();
1736        let header = Header::new();
1737        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1738        let record = vcf.empty_record();
1739
1740        assert!(record.has_filter("PASS".as_bytes()));
1741        assert!(record.has_filter(".".as_bytes()));
1742        assert!(record.has_filter(&Id(0)));
1743        assert!(!record.has_filter("foo".as_bytes()));
1744        assert!(!record.has_filter(&Id(2)));
1745    }
1746
1747    #[test]
1748    fn test_record_has_filter_custom() {
1749        let tmp = NamedTempFile::new().unwrap();
1750        let path = tmp.path();
1751        let mut header = Header::new();
1752        header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1753        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1754        let mut record = vcf.empty_record();
1755        record.push_filter("foo".as_bytes()).unwrap();
1756
1757        assert!(record.has_filter("foo".as_bytes()));
1758        assert!(!record.has_filter("PASS".as_bytes()))
1759    }
1760
1761    #[test]
1762    fn test_record_push_filter() {
1763        let tmp = NamedTempFile::new().unwrap();
1764        let path = tmp.path();
1765        let mut header = Header::new();
1766        header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1767        header.push_record(br#"##FILTER=<ID=bar,Description="dranks">"#);
1768        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1769        let mut record = vcf.empty_record();
1770        assert!(record.has_filter("PASS".as_bytes()));
1771        record.push_filter("foo".as_bytes()).unwrap();
1772        let bar = record.header().name_to_id(b"bar").unwrap();
1773        record.push_filter(&bar).unwrap();
1774        assert!(record.has_filter("foo".as_bytes()));
1775        assert!(record.has_filter(&bar));
1776        assert!(!record.has_filter("PASS".as_bytes()));
1777        assert!(record.push_filter("baz".as_bytes()).is_err())
1778    }
1779
1780    #[test]
1781    fn test_record_set_filters() {
1782        let tmp = NamedTempFile::new().unwrap();
1783        let path = tmp.path();
1784        let mut header = Header::new();
1785        header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1786        header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
1787        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1788        let mut record = vcf.empty_record();
1789        assert!(record.has_filter("PASS".as_bytes()));
1790        record
1791            .set_filters(&["foo".as_bytes(), "bar".as_bytes()])
1792            .unwrap();
1793        assert!(record.has_filter("foo".as_bytes()));
1794        assert!(record.has_filter("bar".as_bytes()));
1795        assert!(!record.has_filter("PASS".as_bytes()));
1796        let filters: &[&Id] = &[];
1797        record.set_filters(filters).unwrap();
1798        assert!(record.has_filter("PASS".as_bytes()));
1799        assert!(!record.has_filter("foo".as_bytes()));
1800        assert!(record
1801            .set_filters(&["foo".as_bytes(), "baz".as_bytes()])
1802            .is_err())
1803    }
1804
1805    #[test]
1806    fn test_record_remove_filter() {
1807        let tmp = NamedTempFile::new().unwrap();
1808        let path = tmp.path();
1809        let mut header = Header::new();
1810        header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1811        header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
1812        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1813        let mut record = vcf.empty_record();
1814        let foo = record.header().name_to_id(b"foo").unwrap();
1815        let bar = record.header().name_to_id(b"bar").unwrap();
1816        record.set_filters(&[&foo, &bar]).unwrap();
1817        assert!(record.has_filter(&foo));
1818        assert!(record.has_filter(&bar));
1819        record.remove_filter(&foo, true).unwrap();
1820        assert!(!record.has_filter(&foo));
1821        assert!(record.has_filter(&bar));
1822        assert!(record.remove_filter("baz".as_bytes(), true).is_err());
1823        record.remove_filter(&bar, true).unwrap();
1824        assert!(!record.has_filter(&bar));
1825        assert!(record.has_filter("PASS".as_bytes()));
1826    }
1827
1828    #[test]
1829    fn test_record_to_vcf_string_err() {
1830        let tmp = NamedTempFile::new().unwrap();
1831        let path = tmp.path();
1832        let header = Header::new();
1833        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1834        let record = vcf.empty_record();
1835        assert!(record.to_vcf_string().is_err());
1836    }
1837
1838    #[test]
1839    fn test_record_to_vcf_string() {
1840        let tmp = NamedTempFile::new().unwrap();
1841        let path = tmp.path();
1842        let mut header = Header::new();
1843        header.push_record(b"##contig=<ID=chr1,length=1000>");
1844        header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1845        let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1846        let mut record = vcf.empty_record();
1847        record.push_filter("foo".as_bytes()).unwrap();
1848        assert_eq!(
1849            record.to_vcf_string().unwrap(),
1850            "chr1\t1\t.\t.\t.\t0\tfoo\t.\n"
1851        );
1852    }
1853}