Skip to main content

rust_htslib/bcf/
mod.rs

1// Copyright 2014 Johannes Köster.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6//! Module for working with VCF and BCF files.
7//!
8//! # Performance Remarks
9//!
10//! Note that BCF corresponds to the in-memory representation of BCF/VCF records in Htslib
11//! itself. Thus, it comes without a runtime penalty for parsing, in contrast to reading VCF
12//! files.
13//!
14//! # Example (reading)
15//!
16//!   - Obtaining 0-based locus index of the VCF record.
17//!   - Obtaining alleles of the VCF record.
18//!   - calculate alt-allele dosage in a mutli-sample VCF / BCF
19//!
20//! ```
21//! use crate::rust_htslib::bcf::{Reader, Read};
22//! use std::convert::TryFrom;
23//!
24//! let path = &"test/test_string.vcf";
25//! let mut bcf = Reader::from_path(path).expect("Error opening file.");
26//! // iterate through each row of the vcf body.
27//! for (i, record_result) in bcf.records().enumerate() {
28//!     let mut record = record_result.expect("Fail to read record");
29//!     let mut s = String::new();
30//!      for allele in record.alleles() {
31//!          for c in allele {
32//!              s.push(char::from(*c))
33//!          }
34//!          s.push(' ')
35//!      }
36//!     // 0-based position and the list of alleles
37//!     println!("Locus: {}, Alleles: {}", record.pos(), s);
38//!     // number of sample in the vcf
39//!     let sample_count = usize::try_from(record.sample_count()).unwrap();
40//!
41//!     // Counting ref, alt and missing alleles for each sample
42//!     let mut n_ref = vec![0; sample_count];
43//!     let mut n_alt = vec![0; sample_count];
44//!     let mut n_missing = vec![0; sample_count];
45//!     let gts = record.genotypes().expect("Error reading genotypes");
46//!     for sample_index in 0..sample_count {
47//!         // for each sample
48//!         for gta in gts.get(sample_index).iter() {
49//!             // for each allele
50//!             match gta.index() {
51//!                 Some(0) => n_ref[sample_index] += 1,  // reference allele
52//!                 Some(_) => n_alt[sample_index] += 1,  // alt allele
53//!                 None => n_missing[sample_index] += 1, // missing allele
54//!             }
55//!         }
56//!     }
57//! }
58//! ```
59//!
60//! # Example (writing)
61//!
62//!   - Setting up a VCF writer from scratch (including a simple header)
63//!   - Creating a VCF record and writing it to the VCF file
64//!
65//! ```
66//! use rust_htslib::bcf::{Format, Writer};
67//! use rust_htslib::bcf::header::Header;
68//! use rust_htslib::bcf::record::GenotypeAllele;
69//!
70//! // Create minimal VCF header with a single contig and a single sample
71//! let mut header = Header::new();
72//! let header_contig_line = r#"##contig=<ID=1,length=10>"#;
73//! header.push_record(header_contig_line.as_bytes());
74//! let header_gt_line = r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#;
75//! header.push_record(header_gt_line.as_bytes());
76//! header.push_sample("test_sample".as_bytes());
77//!
78//! // Write uncompressed VCF to stdout with above header and get an empty record
79//! let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
80//! let mut record = vcf.empty_record();
81//!
82//! // Set chrom and pos to 1 and 7, respectively - note the 0-based positions
83//! let rid = vcf.header().name2rid(b"1").unwrap();
84//! record.set_rid(Some(rid));
85//! record.set_pos(6);
86//!
87//! // Set record genotype to 0|1 - note first allele is always unphased
88//! let alleles = &[GenotypeAllele::Unphased(0), GenotypeAllele::Phased(1)];
89//! record.push_genotypes(alleles).unwrap();
90//!
91//! // Write record
92//! vcf.write(&record).unwrap()
93//! ```
94//!
95//! This will print the following VCF to stdout:
96//!
97//! ```lang-none
98//! ##fileformat=VCFv4.2
99//! ##FILTER=<ID=PASS,Description="All filters passed">
100//! ##contig=<ID=1,length=10>
101//! ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
102//! #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  test_sample
103//! 1       7       .       .       .       0       .       .       GT      0|1
104//! ```
105
106use std::ffi;
107use std::path::Path;
108use std::str;
109use std::sync::Arc;
110
111use url::Url;
112
113pub mod buffer;
114pub mod header;
115pub mod index;
116pub mod record;
117
118use crate::bcf::header::{HeaderView, SampleSubset};
119use crate::errors::{Error, Result};
120use crate::htslib;
121
122pub use crate::bcf::header::{Header, HeaderRecord};
123pub use crate::bcf::record::Record;
124
125/// A trait for a BCF reader with a read method.
126pub trait Read: Sized {
127    /// Read the next record.
128    ///
129    /// # Arguments
130    /// * record - an empty record, that can be created with `bcf::Reader::empty_record`.
131    ///
132    /// # Returns
133    /// None if end of file was reached, otherwise Some will contain
134    /// a result with an error in case of failure.
135    fn read(&mut self, record: &mut record::Record) -> Option<Result<()>>;
136
137    /// Return an iterator over all records of the VCF/BCF file.
138    fn records(&mut self) -> Records<'_, Self>;
139
140    /// Return the header.
141    fn header(&self) -> &HeaderView;
142
143    /// Return empty record.  Can be reused multiple times.
144    fn empty_record(&self) -> Record;
145
146    /// Activate multi-threaded BCF/VCF read support in htslib. This should permit faster
147    /// reading of large VCF files.
148    ///
149    /// Setting `nthreads` to `0` does not change the current state.  Note that it is not
150    /// possible to set the number of background threads below `1` once it has been set.
151    ///
152    /// # Arguments
153    ///
154    /// * `n_threads` - number of extra background writer threads to use, must be `> 0`.
155    fn set_threads(&mut self, n_threads: usize) -> Result<()>;
156}
157
158/// A VCF/BCF reader.
159#[derive(Debug)]
160pub struct Reader {
161    inner: *mut htslib::htsFile,
162    header: Arc<HeaderView>,
163}
164
165unsafe impl Send for Reader {}
166
167/// # Safety
168///
169/// Implementation for `Reader::set_threads()` and `Writer::set_threads`.
170pub unsafe fn set_threads(hts_file: *mut htslib::htsFile, n_threads: usize) -> Result<()> {
171    assert!(n_threads > 0, "n_threads must be > 0");
172
173    let r = htslib::hts_set_threads(hts_file, n_threads as i32);
174    if r != 0 {
175        Err(Error::SetThreads)
176    } else {
177        Ok(())
178    }
179}
180
181impl Reader {
182    /// Create a new reader from a given path.
183    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
184        match path.as_ref().to_str() {
185            Some(p) if !path.as_ref().exists() => Err(Error::FileNotFound { path: p.into() }),
186            Some(p) => Self::new(p.as_bytes()),
187            _ => Err(Error::NonUnicodePath),
188        }
189    }
190
191    /// Create a new reader from a given URL.
192    pub fn from_url(url: &Url) -> Result<Self> {
193        Self::new(url.as_str().as_bytes())
194    }
195
196    /// Create a new reader from standard input.
197    pub fn from_stdin() -> Result<Self> {
198        Self::new(b"-")
199    }
200
201    fn new(path: &[u8]) -> Result<Self> {
202        let htsfile = bcf_open(path, b"r")?;
203        let header = unsafe { htslib::bcf_hdr_read(htsfile) };
204        Ok(Reader {
205            inner: htsfile,
206            header: Arc::new(unsafe { HeaderView::from_ptr(header) }),
207        })
208    }
209}
210
211impl Read for Reader {
212    fn read(&mut self, record: &mut record::Record) -> Option<Result<()>> {
213        match unsafe { htslib::bcf_read(self.inner, self.header.inner, record.inner) } {
214            0 => {
215                unsafe {
216                    // Always unpack record.
217                    htslib::bcf_unpack(record.inner_mut(), htslib::BCF_UN_ALL as i32);
218                }
219                record.set_header(Arc::clone(&self.header));
220                Some(Ok(()))
221            }
222            -1 => None,
223            _ => Some(Err(Error::BcfInvalidRecord)),
224        }
225    }
226
227    fn records(&mut self) -> Records<'_, Self> {
228        Records::new(self)
229    }
230
231    fn set_threads(&mut self, n_threads: usize) -> Result<()> {
232        unsafe { set_threads(self.inner, n_threads) }
233    }
234
235    fn header(&self) -> &HeaderView {
236        &self.header
237    }
238
239    /// Return empty record.  Can be reused multiple times.
240    fn empty_record(&self) -> Record {
241        Record::new(self.header.clone())
242    }
243}
244
245impl Drop for Reader {
246    fn drop(&mut self) {
247        unsafe {
248            htslib::hts_close(self.inner);
249        }
250    }
251}
252
253/// An indexed VCF/BCF reader.
254#[derive(Debug)]
255pub struct IndexedReader {
256    /// The synced VCF/BCF reader to use internally.
257    inner: *mut htslib::bcf_srs_t,
258    /// The header.
259    header: Arc<HeaderView>,
260
261    /// The position of the previous fetch, if any.
262    current_region: Option<(u32, u64, Option<u64>)>,
263}
264
265unsafe impl Send for IndexedReader {}
266
267impl IndexedReader {
268    /// Create a new `IndexedReader` from path.
269    ///
270    /// # Arguments
271    ///
272    /// * `path` - the path to open.
273    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
274        let path = path.as_ref();
275        match path.to_str() {
276            Some(p) if path.exists() => {
277                Self::new(&ffi::CString::new(p).map_err(|_| Error::NonUnicodePath)?)
278            }
279            Some(p) => Err(Error::FileNotFound { path: p.into() }),
280            None => Err(Error::NonUnicodePath),
281        }
282    }
283
284    /// Create a new `IndexedReader` from an URL.
285    pub fn from_url(url: &Url) -> Result<Self> {
286        Self::new(&ffi::CString::new(url.as_str()).unwrap())
287    }
288
289    /// Create a new `IndexedReader`.
290    ///
291    /// # Arguments
292    ///
293    /// * `path` - the path. Use "-" for stdin.
294    fn new(path: &ffi::CStr) -> Result<Self> {
295        // Create reader and require existence of index file.
296        let ser_reader = unsafe { htslib::bcf_sr_init() };
297        unsafe {
298            htslib::bcf_sr_set_opt(ser_reader, 0);
299        } // 0: BCF_SR_REQUIRE_IDX
300          // Attach a file with the path from the arguments.
301        if unsafe { htslib::bcf_sr_add_reader(ser_reader, path.as_ptr()) } >= 0 {
302            let header = Arc::new(unsafe {
303                HeaderView::from_ptr(htslib::bcf_hdr_dup(
304                    (*(*ser_reader).readers.offset(0)).header,
305                ))
306            });
307            Ok(IndexedReader {
308                inner: ser_reader,
309                header,
310                current_region: None,
311            })
312        } else {
313            Err(Error::BcfOpen {
314                target: path.to_str().unwrap().to_owned(),
315            })
316        }
317    }
318
319    /// Jump to the given region.
320    ///
321    /// # Arguments
322    ///
323    /// * `rid` - numeric ID of the reference to jump to; use `HeaderView::name2rid` for resolving
324    ///   contig name to ID.
325    /// * `start` - `0`-based **inclusive** start coordinate of region on reference.
326    /// * `end` - Optional `0`-based **inclusive** end coordinate of region on reference. If `None`
327    ///   is given, records are fetched from `start` until the end of the contig.
328    ///
329    /// # Note
330    /// The entire contig can be fetched by setting `start` to `0` and `end` to `None`.
331    pub fn fetch(&mut self, rid: u32, start: u64, end: Option<u64>) -> Result<()> {
332        let contig = self.header.rid2name(rid)?;
333        let contig = ffi::CString::new(contig).unwrap();
334        if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
335            Err(Error::GenomicSeek {
336                contig: contig.to_str().unwrap().to_owned(),
337                start,
338            })
339        } else {
340            self.current_region = Some((rid, start, end));
341            Ok(())
342        }
343    }
344}
345
346impl Read for IndexedReader {
347    fn read(&mut self, record: &mut record::Record) -> Option<Result<()>> {
348        match unsafe { htslib::bcf_sr_next_line(self.inner) } {
349            0 => {
350                if unsafe { (*self.inner).errnum } != 0 {
351                    Some(Err(Error::BcfInvalidRecord))
352                } else {
353                    None
354                }
355            }
356            i => {
357                assert!(i > 0, "Must not be negative");
358                // Note that the sync BCF reader has a different interface than the others
359                // as it keeps its own buffer already for each record.  An alternative here
360                // would be to replace the `inner` value by an enum that can be a pointer
361                // into a synced reader or an owning popinter to an allocated record.
362                unsafe {
363                    htslib::bcf_copy(
364                        record.inner,
365                        *(*(*self.inner).readers.offset(0)).buffer.offset(0),
366                    );
367                }
368
369                unsafe {
370                    // Always unpack record.
371                    htslib::bcf_unpack(record.inner_mut(), htslib::BCF_UN_ALL as i32);
372                }
373
374                record.set_header(Arc::clone(&self.header));
375
376                match self.current_region {
377                    Some((rid, _start, end)) => {
378                        let endpos = end.unwrap_or(u64::MAX);
379                        if Some(rid) == record.rid() && record.pos() as u64 <= endpos {
380                            Some(Ok(()))
381                        } else {
382                            None
383                        }
384                    }
385                    None => Some(Ok(())),
386                }
387            }
388        }
389    }
390
391    fn records(&mut self) -> Records<'_, Self> {
392        Records::new(self)
393    }
394
395    fn set_threads(&mut self, n_threads: usize) -> Result<()> {
396        assert!(n_threads > 0, "n_threads must be > 0");
397
398        let r = unsafe { htslib::bcf_sr_set_threads(self.inner, n_threads as i32) };
399        if r != 0 {
400            Err(Error::SetThreads)
401        } else {
402            Ok(())
403        }
404    }
405
406    fn header(&self) -> &HeaderView {
407        &self.header
408    }
409
410    fn empty_record(&self) -> Record {
411        Record::new(self.header.clone())
412    }
413}
414
415impl Drop for IndexedReader {
416    fn drop(&mut self) {
417        unsafe { htslib::bcf_sr_destroy(self.inner) };
418    }
419}
420
421/// This module contains the `SyncedReader` class and related code.
422pub mod synced {
423
424    use super::*;
425
426    /// This module contains bitmask constants for `SyncedReader`.
427    pub mod pairing {
428        /// Allow different alleles, as long as they all are SNPs.
429        pub const SNPS: u32 = crate::htslib::BCF_SR_PAIR_SNPS;
430        /// The same as above, but with indels.
431        pub const INDELS: u32 = crate::htslib::BCF_SR_PAIR_INDELS;
432        /// Any combination of alleles can be returned by `bcf_sr_next_line()`.
433        pub const ANY: u32 = crate::htslib::BCF_SR_PAIR_ANY;
434        /// At least some of multiallelic ALTs must match.  Implied by all the others with the exception of `EXACT`.
435        pub const SOME: u32 = crate::htslib::BCF_SR_PAIR_SOME;
436        /// Allow REF-only records with SNPs.
437        pub const SNP_REF: u32 = crate::htslib::BCF_SR_PAIR_SNP_REF;
438        /// Allow REF-only records with indels.
439        pub const INDEL_REF: u32 = crate::htslib::BCF_SR_PAIR_INDEL_REF;
440        /// Require the exact same set of alleles in all files.
441        pub const EXACT: u32 = crate::htslib::BCF_SR_PAIR_EXACT;
442        /// `SNPS | INDELS`.
443        pub const BOTH: u32 = crate::htslib::BCF_SR_PAIR_BOTH;
444        /// `SNPS | INDELS | SNP_REF | INDEL_REF`.
445        pub const BOTH_REF: u32 = crate::htslib::BCF_SR_PAIR_BOTH_REF;
446    }
447
448    /// A wrapper for `bcf_srs_t`; allows joint traversal of multiple VCF and/or BCF files.
449    #[derive(Debug)]
450    pub struct SyncedReader {
451        /// Internal handle for the synced reader.
452        inner: *mut crate::htslib::bcf_srs_t,
453
454        /// Arcs of `HeaderView`s of the readers.
455        headers: Vec<Arc<HeaderView>>,
456
457        /// The position of the previous fetch, if any.
458        current_region: Option<(u32, u64, u64)>,
459    }
460
461    // TODO: add interface for setting threads, ensure that the pool is freed properly
462    impl SyncedReader {
463        pub fn new() -> Result<Self> {
464            let inner = unsafe { crate::htslib::bcf_sr_init() };
465            if inner.is_null() {
466                return Err(Error::BcfAllocationError);
467            }
468
469            Ok(SyncedReader {
470                inner,
471                headers: Vec::new(),
472                current_region: None,
473            })
474        }
475
476        /// Enable or disable requiring of index
477        pub fn set_require_index(&mut self, do_require: bool) {
478            unsafe {
479                (*self.inner).require_index = if do_require { 1 } else { 0 };
480            }
481        }
482
483        /// Set the given bitmask of values from `sr_pairing` module.
484        pub fn set_pairing(&mut self, bitmask: u32) {
485            unsafe {
486                // TODO: 1 actually is BCF_SR_PAIR_LOGIC but is not available here?
487                crate::htslib::bcf_sr_set_opt(self.inner, 1, bitmask);
488            }
489        }
490
491        /// Add new reader with the path to the file.
492        pub fn add_reader<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
493            match path.as_ref().to_str() {
494                Some(p) if path.as_ref().exists() => {
495                    let p_cstring = ffi::CString::new(p).unwrap();
496                    let res =
497                        unsafe { crate::htslib::bcf_sr_add_reader(self.inner, p_cstring.as_ptr()) };
498
499                    if res == 0 {
500                        return Err(Error::BcfOpen {
501                            target: p.to_owned(),
502                        });
503                    }
504
505                    let i = (self.reader_count() - 1) as isize;
506                    let header = Arc::new(unsafe {
507                        HeaderView::from_ptr(crate::htslib::bcf_hdr_dup(
508                            (*(*self.inner).readers.offset(i)).header,
509                        ))
510                    });
511                    self.headers.push(header);
512                    Ok(())
513                }
514                _ => Err(Error::NonUnicodePath),
515            }
516        }
517
518        /// Remove reader with the given index.
519        pub fn remove_reader(&mut self, idx: u32) {
520            if idx >= self.reader_count() {
521                panic!("Invalid reader!");
522            } else {
523                unsafe {
524                    crate::htslib::bcf_sr_remove_reader(self.inner, idx as i32);
525                }
526                self.headers.remove(idx as usize);
527            }
528        }
529
530        /// Return number of open files/readers.
531        pub fn reader_count(&self) -> u32 {
532            unsafe { (*self.inner).nreaders as u32 }
533        }
534
535        /// Read next line and return number of readers that have the given line (0 if end of all files is reached).
536        pub fn read_next(&mut self) -> Result<u32> {
537            let num = unsafe { crate::htslib::bcf_sr_next_line(self.inner) as u32 };
538
539            if num == 0 {
540                if unsafe { (*self.inner).errnum } != 0 {
541                    return Err(Error::BcfInvalidRecord);
542                }
543                Ok(0)
544            } else {
545                assert!(num > 0, "num returned by htslib must not be negative");
546                match self.current_region {
547                    Some((rid, _start, end)) => {
548                        for idx in 0..self.reader_count() {
549                            if !self.has_line(idx) {
550                                continue;
551                            }
552                            unsafe {
553                                let record = *(*(*self.inner).readers.offset(idx as isize))
554                                    .buffer
555                                    .offset(0);
556                                if (*record).rid != (rid as i32) || (*record).pos >= (end as i64) {
557                                    return Ok(0);
558                                }
559                            }
560                        }
561                        Ok(num)
562                    }
563                    None => Ok(num),
564                }
565            }
566        }
567
568        /// Return whether the given reader has the line.
569        pub fn has_line(&self, idx: u32) -> bool {
570            if idx >= self.reader_count() {
571                panic!("Invalid reader!");
572            } else {
573                unsafe { (*(*self.inner).has_line.offset(idx as isize)) != 0 }
574            }
575        }
576
577        /// Return record from the given reader, if any.
578        pub fn record(&self, idx: u32) -> Option<Record> {
579            if self.has_line(idx) {
580                let record = Record::new(self.headers[idx as usize].clone());
581                unsafe {
582                    crate::htslib::bcf_copy(
583                        record.inner,
584                        *(*(*self.inner).readers.offset(idx as isize))
585                            .buffer
586                            .offset(0),
587                    );
588                }
589                Some(record)
590            } else {
591                None
592            }
593        }
594
595        /// Return header from the given reader.
596        pub fn header(&self, idx: u32) -> &HeaderView {
597            // TODO: is the mutability here correct?
598            if idx >= self.reader_count() {
599                panic!("Invalid reader!");
600            } else {
601                &self.headers[idx as usize]
602            }
603        }
604
605        /// Jump to the given region.
606        ///
607        /// # Arguments
608        ///
609        /// * `rid` - numeric ID of the reference to jump to; use `HeaderView::name2rid` for resolving
610        ///   contig name to ID.
611        /// * `start` - `0`-based start coordinate of region on reference.
612        /// * `end` - `0`-based end coordinate of region on reference.
613        pub fn fetch(&mut self, rid: u32, start: u64, end: u64) -> Result<()> {
614            let contig = {
615                let contig = self.header(0).rid2name(rid).unwrap(); //.clone();
616                ffi::CString::new(contig).unwrap()
617            };
618            if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
619                Err(Error::GenomicSeek {
620                    contig: contig.to_str().unwrap().to_owned(),
621                    start,
622                })
623            } else {
624                self.current_region = Some((rid, start, end));
625                Ok(())
626            }
627        }
628    }
629
630    impl Drop for SyncedReader {
631        fn drop(&mut self) {
632            unsafe { crate::htslib::bcf_sr_destroy(self.inner) };
633        }
634    }
635}
636
637#[derive(Clone, Copy, Debug, PartialEq, Eq)]
638pub enum Format {
639    Vcf,
640    Bcf,
641}
642
643/// A VCF/BCF writer.
644#[derive(Debug)]
645pub struct Writer {
646    inner: *mut htslib::htsFile,
647    header: Arc<HeaderView>,
648    subset: Option<SampleSubset>,
649}
650
651unsafe impl Send for Writer {}
652
653impl Writer {
654    /// Create a new writer that writes to the given path.
655    ///
656    /// # Arguments
657    ///
658    /// * `path` - the path
659    /// * `header` - header definition to use
660    /// * `uncompressed` - disable compression
661    /// * `vcf` - write VCF instead of BCF
662    pub fn from_path<P: AsRef<Path>>(
663        path: P,
664        header: &Header,
665        uncompressed: bool,
666        format: Format,
667    ) -> Result<Self> {
668        if let Some(p) = path.as_ref().to_str() {
669            Ok(Self::new(p.as_bytes(), header, uncompressed, format)?)
670        } else {
671            Err(Error::NonUnicodePath)
672        }
673    }
674
675    /// Create a new writer from a URL.
676    ///
677    /// # Arguments
678    ///
679    /// * `url` - the URL
680    /// * `header` - header definition to use
681    /// * `uncompressed` - disable compression
682    /// * `vcf` - write VCF instead of BCF
683    pub fn from_url(
684        url: &Url,
685        header: &Header,
686        uncompressed: bool,
687        format: Format,
688    ) -> Result<Self> {
689        Self::new(url.as_str().as_bytes(), header, uncompressed, format)
690    }
691
692    /// Create a new writer to stdout.
693    ///
694    /// # Arguments
695    ///
696    /// * `header` - header definition to use
697    /// * `uncompressed` - disable compression
698    /// * `vcf` - write VCF instead of BCF
699    pub fn from_stdout(header: &Header, uncompressed: bool, format: Format) -> Result<Self> {
700        Self::new(b"-", header, uncompressed, format)
701    }
702
703    fn new(path: &[u8], header: &Header, uncompressed: bool, format: Format) -> Result<Self> {
704        let mode: &[u8] = match (uncompressed, format) {
705            (true, Format::Vcf) => b"w",
706            (false, Format::Vcf) => b"wz",
707            (true, Format::Bcf) => b"wbu",
708            (false, Format::Bcf) => b"wb",
709        };
710
711        let htsfile = bcf_open(path, mode)?;
712        unsafe { htslib::bcf_hdr_write(htsfile, header.inner) };
713        Ok(Writer {
714            inner: htsfile,
715            header: Arc::new(unsafe { HeaderView::from_ptr(htslib::bcf_hdr_dup(header.inner)) }),
716            subset: header.subset.clone(),
717        })
718    }
719
720    /// Obtain reference to the lightweight `HeaderView` of the BCF header.
721    pub fn header(&self) -> &HeaderView {
722        &self.header
723    }
724
725    /// Create empty record for writing to this writer.
726    ///
727    /// This record can then be reused multiple times.
728    pub fn empty_record(&self) -> Record {
729        Record::new(self.header.clone())
730    }
731
732    /// Translate record to header of this writer.
733    ///
734    /// # Arguments
735    ///
736    /// - `record` - The `Record` to translate.
737    pub fn translate(&mut self, record: &mut record::Record) {
738        unsafe {
739            htslib::bcf_translate(self.header.inner, record.header().inner, record.inner);
740        }
741        record.set_header(Arc::clone(&self.header));
742    }
743
744    /// Subset samples of record to match header of this writer.
745    ///
746    /// # Arguments
747    ///
748    /// - `record` - The `Record` to modify.
749    pub fn subset(&mut self, record: &mut record::Record) {
750        if let Some(ref mut subset) = self.subset {
751            unsafe {
752                htslib::bcf_subset(
753                    self.header.inner,
754                    record.inner,
755                    subset.len() as i32,
756                    subset.as_mut_ptr(),
757                );
758            }
759        }
760    }
761
762    /// Write `record` to the Writer.
763    ///
764    /// # Arguments
765    ///
766    /// - `record` - The `Record` to write.
767    pub fn write(&mut self, record: &record::Record) -> Result<()> {
768        if unsafe { htslib::bcf_write(self.inner, self.header.inner, record.inner) } == -1 {
769            Err(Error::WriteRecord)
770        } else {
771            Ok(())
772        }
773    }
774
775    /// Activate multi-threaded BCF write support in htslib. This should permit faster
776    /// writing of large BCF files.
777    ///
778    /// # Arguments
779    ///
780    /// * `n_threads` - number of extra background writer threads to use, must be `> 0`.
781    pub fn set_threads(&mut self, n_threads: usize) -> Result<()> {
782        unsafe { set_threads(self.inner, n_threads) }
783    }
784}
785
786impl Drop for Writer {
787    fn drop(&mut self) {
788        unsafe {
789            htslib::hts_close(self.inner);
790        }
791    }
792}
793
794#[derive(Debug)]
795pub struct Records<'a, R: Read> {
796    reader: &'a mut R,
797}
798
799impl<'a, R: Read> Records<'a, R> {
800    pub fn new(reader: &'a mut R) -> Self {
801        Self { reader }
802    }
803}
804
805impl<R: Read> Iterator for Records<'_, R> {
806    type Item = Result<record::Record>;
807
808    fn next(&mut self) -> Option<Result<record::Record>> {
809        let mut record = self.reader.empty_record();
810        match self.reader.read(&mut record) {
811            Some(Err(e)) => Some(Err(e)),
812            Some(Ok(_)) => Some(Ok(record)),
813            None => None,
814        }
815    }
816}
817
818/// Wrapper for opening a BCF file.
819fn bcf_open(target: &[u8], mode: &[u8]) -> Result<*mut htslib::htsFile> {
820    let p = ffi::CString::new(target).unwrap();
821    let c_str = ffi::CString::new(mode).unwrap();
822    let ret = unsafe { htslib::hts_open(p.as_ptr(), c_str.as_ptr()) };
823
824    if ret.is_null() {
825        return Err(Error::BcfOpen {
826            target: str::from_utf8(target).unwrap().to_owned(),
827        });
828    }
829
830    unsafe {
831        if !(mode.contains(&b'w')
832            || (*ret).format.category == htslib::htsFormatCategory_variant_data)
833        {
834            return Err(Error::BcfOpen {
835                target: str::from_utf8(target).unwrap().to_owned(),
836            });
837        }
838    }
839    Ok(ret)
840}
841
842#[cfg(test)]
843mod tests {
844    use tempfile::NamedTempFile;
845
846    use super::record::Buffer;
847    use super::*;
848    use crate::bcf::header::Id;
849    use crate::bcf::record::GenotypeAllele;
850    use crate::bcf::record::Numeric;
851    use crate::bcf::Reader;
852    use std::convert::TryFrom;
853    use std::fs::File;
854    use std::io::prelude::Read as IoRead;
855    use std::path::Path;
856    use std::str;
857
858    fn _test_read<P: AsRef<Path>>(path: &P) {
859        let mut bcf = Reader::from_path(path).expect("Error opening file.");
860        assert_eq!(bcf.header.samples(), [b"NA12878.subsample-0.25-0"]);
861
862        for (i, rec) in bcf.records().enumerate() {
863            let record = rec.expect("Error reading record.");
864            assert_eq!(record.sample_count(), 1);
865
866            assert_eq!(record.rid().expect("Error reading rid."), 0);
867            assert_eq!(record.pos(), 10021 + i as i64);
868            assert!((record.qual() - 0f32).abs() < f32::EPSILON);
869            let mut buffer = Buffer::new();
870            assert!(
871                (record
872                    .info_shared_buffer(b"MQ0F", &mut buffer)
873                    .float()
874                    .expect("Error reading info.")
875                    .expect("Missing tag")[0]
876                    - 1.0)
877                    .abs()
878                    < f32::EPSILON
879            );
880            if i == 59 {
881                assert!(
882                    (record
883                        .info_shared_buffer(b"SGB", &mut buffer)
884                        .float()
885                        .expect("Error reading info.")
886                        .expect("Missing tag")[0]
887                        - -0.379885)
888                        .abs()
889                        < f32::EPSILON
890                );
891            }
892            // the artificial "not observed" allele is present in each record.
893            assert_eq!(record.alleles().iter().last().unwrap(), b"<X>");
894
895            let fmt = record.format(b"PL");
896            let pl = fmt.integer().expect("Error reading format.");
897            assert_eq!(pl.len(), 1);
898            if i == 59 {
899                assert_eq!(pl[0].len(), 6);
900            } else {
901                assert_eq!(pl[0].len(), 3);
902            }
903        }
904    }
905
906    #[test]
907    fn test_read() {
908        _test_read(&"test/test.bcf");
909    }
910
911    #[test]
912    fn test_reader_set_threads() {
913        let path = &"test/test.bcf";
914        let mut bcf = Reader::from_path(path).expect("Error opening file.");
915        bcf.set_threads(2).unwrap();
916    }
917
918    #[test]
919    fn test_writer_set_threads() {
920        let path = &"test/test.bcf";
921        let tmp = tempfile::Builder::new()
922            .prefix("rust-htslib")
923            .tempdir()
924            .expect("Cannot create temp dir");
925        let bcfpath = tmp.path().join("test.bcf");
926        let bcf = Reader::from_path(path).expect("Error opening file.");
927        let header = Header::from_template_subset(&bcf.header, &[b"NA12878.subsample-0.25-0"])
928            .expect("Error subsetting samples.");
929        let mut writer =
930            Writer::from_path(&bcfpath, &header, false, Format::Bcf).expect("Error opening file.");
931        writer.set_threads(2).unwrap();
932    }
933
934    #[test]
935    fn test_fetch() {
936        let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
937        bcf.set_threads(2).unwrap();
938        let rid = bcf
939            .header()
940            .name2rid(b"1")
941            .expect("Translating from contig '1' to ID failed.");
942        bcf.fetch(rid, 10_033, Some(10_060))
943            .expect("Fetching failed");
944        assert_eq!(bcf.records().count(), 28);
945    }
946
947    #[test]
948    fn test_fetch_all() {
949        let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
950        bcf.set_threads(2).unwrap();
951        let rid = bcf
952            .header()
953            .name2rid(b"1")
954            .expect("Translating from contig '1' to ID failed.");
955        bcf.fetch(rid, 0, None).expect("Fetching failed");
956        assert_eq!(bcf.records().count(), 62);
957    }
958
959    #[test]
960    fn test_fetch_open_ended() {
961        let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
962        bcf.set_threads(2).unwrap();
963        let rid = bcf
964            .header()
965            .name2rid(b"1")
966            .expect("Translating from contig '1' to ID failed.");
967        bcf.fetch(rid, 10077, None).expect("Fetching failed");
968        assert_eq!(bcf.records().count(), 6);
969    }
970
971    #[test]
972    fn test_fetch_start_greater_than_last_vcf_pos() {
973        let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
974        bcf.set_threads(2).unwrap();
975        let rid = bcf
976            .header()
977            .name2rid(b"1")
978            .expect("Translating from contig '1' to ID failed.");
979        bcf.fetch(rid, 20077, None).expect("Fetching failed");
980        assert_eq!(bcf.records().count(), 0);
981    }
982
983    #[test]
984    fn test_write() {
985        let mut bcf = Reader::from_path("test/test_multi.bcf").expect("Error opening file.");
986        let tmp = tempfile::Builder::new()
987            .prefix("rust-htslib")
988            .tempdir()
989            .expect("Cannot create temp dir");
990        let bcfpath = tmp.path().join("test.bcf");
991        println!("{:?}", bcfpath);
992        {
993            let header = Header::from_template_subset(&bcf.header, &[b"NA12878.subsample-0.25-0"])
994                .expect("Error subsetting samples.");
995            let mut writer = Writer::from_path(&bcfpath, &header, false, Format::Bcf)
996                .expect("Error opening file.");
997            for rec in bcf.records() {
998                let mut record = rec.expect("Error reading record.");
999                writer.translate(&mut record);
1000                writer.subset(&mut record);
1001                record.trim_alleles().expect("Error trimming alleles.");
1002                writer.write(&record).expect("Error writing record");
1003            }
1004        }
1005        {
1006            _test_read(&bcfpath);
1007        }
1008        tmp.close().expect("Failed to delete temp dir");
1009    }
1010
1011    #[test]
1012    fn test_strings() {
1013        let mut vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1014        let fs1 = [
1015            &b"LongString1"[..],
1016            &b"LongString2"[..],
1017            &b"."[..],
1018            &b"LongString4"[..],
1019            &b"evenlength"[..],
1020            &b"ss6"[..],
1021        ];
1022        let mut buffer = Buffer::new();
1023        for (i, rec) in vcf.records().enumerate() {
1024            println!("record {}", i);
1025            let record = rec.expect("Error reading record.");
1026            assert_eq!(
1027                record
1028                    .info_shared_buffer(b"S1", &mut buffer)
1029                    .string()
1030                    .expect("Error reading string.")
1031                    .expect("Missing tag")[0],
1032                format!("string{}", i + 1).as_bytes()
1033            );
1034            let fs1_str_vec = record
1035                .format_shared_buffer(b"FS1", &mut buffer)
1036                .string()
1037                .expect("Error reading string.");
1038            assert_eq!(fs1_str_vec.len(), 2);
1039            println!("{}", String::from_utf8_lossy(fs1_str_vec[0]));
1040            assert_eq!(
1041                record
1042                    .format(b"FS1")
1043                    .string()
1044                    .expect("Error reading string.")[0],
1045                fs1[i]
1046            );
1047        }
1048    }
1049
1050    #[test]
1051    fn test_missing() {
1052        let mut vcf = Reader::from_path("test/test_missing.vcf").expect("Error opening file.");
1053        let fn4 = [
1054            &[
1055                i32::missing(),
1056                i32::missing(),
1057                i32::missing(),
1058                i32::missing(),
1059            ][..],
1060            &[i32::missing()][..],
1061        ];
1062        let f1 = [false, true];
1063        let mut buffer = Buffer::new();
1064        for (i, rec) in vcf.records().enumerate() {
1065            let record = rec.expect("Error reading record.");
1066            assert_eq!(
1067                record
1068                    .info_shared_buffer(b"F1", &mut buffer)
1069                    .float()
1070                    .expect("Error reading float.")
1071                    .expect("Missing tag")[0]
1072                    .is_nan(),
1073                f1[i]
1074            );
1075            assert_eq!(
1076                record
1077                    .format(b"FN4")
1078                    .integer()
1079                    .expect("Error reading integer.")[1],
1080                fn4[i]
1081            );
1082            assert!(
1083                record.format(b"FF4").float().expect("Error reading float.")[1]
1084                    .iter()
1085                    .all(|&v| v.is_missing())
1086            );
1087        }
1088    }
1089
1090    #[test]
1091    fn test_genotypes() {
1092        let mut vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1093        let expected = ["./1", "1|1", "0/1", "0|1", "1|.", "1/1"];
1094        for (rec, exp_gt) in vcf.records().zip(expected.iter()) {
1095            let rec = rec.expect("Error reading record.");
1096            let genotypes = rec.genotypes().expect("Error reading genotypes");
1097            assert_eq!(&format!("{}", genotypes.get(0)), exp_gt);
1098        }
1099    }
1100
1101    #[test]
1102    fn test_genotypes_read_mixed_ploidy() {
1103        let mut vcf = Reader::from_path("test/test_non_diploid.vcf").expect("Error opening file.");
1104
1105        // Expected genotypes for comparison
1106        let expected = [vec!["0", "1"], vec!["0/1", "1/1"], vec!["1|0", "1/1|0"]];
1107
1108        for (rec, exp_gts) in vcf.records().zip(expected.iter()) {
1109            let rec = rec.expect("Error reading record.");
1110
1111            // Get the genotypes from the record
1112            let genotypes = rec.genotypes().expect("Error reading genotypes");
1113
1114            // Compare each genotype with the expected value
1115            for (sample, exp_gt) in exp_gts.iter().enumerate() {
1116                assert_eq!(&format!("{}", genotypes.get(sample)), exp_gt);
1117            }
1118        }
1119    }
1120
1121    #[test]
1122    fn test_genotypes_write_and_read_mixed_ploidy() {
1123        let mut vcf = Reader::from_path("test/test_non_diploid.vcf").expect("Error opening file.");
1124
1125        // Create a temporary file to write the modified VCF data
1126        let tmp = NamedTempFile::new().unwrap();
1127        let path = tmp.path();
1128
1129        {
1130            // Create a VCF writer with the same header as the input VCF
1131            let mut writer = Writer::from_path(
1132                path,
1133                &Header::from_template(vcf.header()),
1134                true,
1135                Format::Vcf,
1136            )
1137            .unwrap();
1138
1139            // Modify record template by adding different genotypes and write the to the temp file.
1140            let mut rec_tpl = vcf.records().next().unwrap().unwrap();
1141            rec_tpl
1142                .push_genotype_structured(
1143                    &[
1144                        vec![GenotypeAllele::Unphased(0)],
1145                        vec![GenotypeAllele::Unphased(1)],
1146                    ],
1147                    3,
1148                )
1149                .unwrap();
1150            writer.write(&rec_tpl).unwrap();
1151            rec_tpl
1152                .push_genotype_structured(
1153                    &[
1154                        vec![GenotypeAllele::Unphased(0), GenotypeAllele::Unphased(1)],
1155                        vec![GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(1)],
1156                    ],
1157                    3,
1158                )
1159                .unwrap();
1160            writer.write(&rec_tpl).unwrap();
1161            rec_tpl
1162                .push_genotype_structured(
1163                    &[
1164                        vec![GenotypeAllele::Unphased(1), GenotypeAllele::Phased(0)],
1165                        vec![
1166                            GenotypeAllele::Unphased(1),
1167                            GenotypeAllele::Unphased(1),
1168                            GenotypeAllele::Phased(0),
1169                        ],
1170                    ],
1171                    3,
1172                )
1173                .unwrap();
1174            writer.write(&rec_tpl).unwrap();
1175        }
1176
1177        // Read back the temporary file with the modified VCF data
1178        let mut reader = Reader::from_path(path).unwrap();
1179
1180        // Expected genotypes for validation
1181        let expected = [vec!["0", "1"], vec!["0/1", "1/1"], vec!["1|0", "1/1|0"]];
1182
1183        // Iterate over the records in the temporary file and validate the genotypes
1184        for (rec, exp_gts) in reader.records().zip(expected.iter()) {
1185            let rec = rec.expect("Error reading record");
1186            let genotypes = rec.genotypes().expect("Error reading genotypes");
1187
1188            // Compare each genotype with the expected value
1189            for (sample, exp_gt) in exp_gts.iter().enumerate() {
1190                assert_eq!(&format!("{}", genotypes.get(sample)), exp_gt);
1191            }
1192        }
1193    }
1194
1195    #[test]
1196    fn test_genotypes_wrong_max_ploidy() {
1197        let mut vcf = Reader::from_path("test/test_non_diploid.vcf").expect("Error opening file.");
1198
1199        // Modify record template by adding different genotypes and write the to the temp file.
1200        let mut rec_tpl = vcf.records().next().unwrap().unwrap();
1201        let err = rec_tpl
1202            .push_genotype_structured(
1203                &[
1204                    vec![
1205                        GenotypeAllele::Unphased(0),
1206                        GenotypeAllele::Unphased(1),
1207                        GenotypeAllele::Unphased(0),
1208                    ],
1209                    vec![
1210                        GenotypeAllele::Unphased(1),
1211                        GenotypeAllele::Unphased(0),
1212                        GenotypeAllele::Unphased(1),
1213                        GenotypeAllele::Unphased(0),
1214                    ],
1215                ],
1216                3,
1217            )
1218            .expect_err(
1219                "This should fail since there are more alleles specified (4 for second sample) than max_ploidy (3) suggests",
1220            );
1221        assert_eq!(err, crate::errors::Error::BcfSetValues);
1222    }
1223
1224    #[test]
1225    fn test_header_ids() {
1226        let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1227        let header = &vcf.header();
1228        use crate::bcf::header::Id;
1229
1230        assert_eq!(header.id_to_name(Id(4)), b"GT");
1231        assert_eq!(header.name_to_id(b"GT").unwrap(), Id(4));
1232        assert!(header.name_to_id(b"XX").is_err());
1233    }
1234
1235    #[test]
1236    fn test_header_samples() {
1237        let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1238        let header = &vcf.header();
1239
1240        assert_eq!(header.id_to_sample(Id(0)), b"one");
1241        assert_eq!(header.id_to_sample(Id(1)), b"two");
1242        assert_eq!(header.sample_to_id(b"one").unwrap(), Id(0));
1243        assert_eq!(header.sample_to_id(b"two").unwrap(), Id(1));
1244        assert!(header.sample_to_id(b"three").is_err());
1245    }
1246
1247    #[test]
1248    fn test_header_contigs() {
1249        let vcf = Reader::from_path("test/test_multi.bcf").expect("Error opening file.");
1250        let header = &vcf.header();
1251
1252        assert_eq!(header.contig_count(), 86);
1253
1254        // test existing contig names and IDs
1255        assert_eq!(header.rid2name(0).unwrap(), b"1");
1256        assert_eq!(header.name2rid(b"1").unwrap(), 0);
1257
1258        assert_eq!(header.rid2name(85).unwrap(), b"hs37d5");
1259        assert_eq!(header.name2rid(b"hs37d5").unwrap(), 85);
1260
1261        // test nonexistent contig names and IDs
1262        assert!(header.name2rid(b"nonexistent_contig").is_err());
1263        assert!(header.rid2name(100).is_err());
1264    }
1265
1266    #[test]
1267    fn test_header_records() {
1268        let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1269        let records = vcf.header().header_records();
1270        assert_eq!(records.len(), 10);
1271
1272        match records[1] {
1273            HeaderRecord::Filter {
1274                ref key,
1275                ref values,
1276            } => {
1277                assert_eq!(key, "FILTER");
1278                assert_eq!(values["ID"], "PASS");
1279            }
1280            _ => {
1281                panic!("Invalid HeaderRecord");
1282            }
1283        }
1284    }
1285
1286    #[test]
1287    fn test_header_info_types() {
1288        let vcf = Reader::from_path("test/test.bcf").unwrap();
1289        let header = vcf.header();
1290        let truth = vec![
1291            (
1292                // INFO=<ID=INDEL,Number=0,Type=Flag>
1293                "INDEL",
1294                header::TagType::Flag,
1295                header::TagLength::Fixed(0),
1296            ),
1297            (
1298                // INFO=<ID=DP,Number=1,Type=Integer>
1299                "DP",
1300                header::TagType::Integer,
1301                header::TagLength::Fixed(1),
1302            ),
1303            (
1304                // INFO=<ID=QS,Number=R,Type=Float>
1305                "QS",
1306                header::TagType::Float,
1307                header::TagLength::Alleles,
1308            ),
1309            (
1310                // INFO=<ID=I16,Number=16,Type=Float>
1311                "I16",
1312                header::TagType::Float,
1313                header::TagLength::Fixed(16),
1314            ),
1315        ];
1316        for (ref_name, ref_type, ref_length) in truth {
1317            let (tag_type, tag_length) = header.info_type(ref_name.as_bytes()).unwrap();
1318            assert_eq!(tag_type, ref_type);
1319            assert_eq!(tag_length, ref_length);
1320        }
1321
1322        let vcf = Reader::from_path("test/test_svlen.vcf").unwrap();
1323        let header = vcf.header();
1324        let truth = vec![
1325            (
1326                // INFO=<ID=IMPRECISE,Number=0,Type=Flag>
1327                "IMPRECISE",
1328                header::TagType::Flag,
1329                header::TagLength::Fixed(0),
1330            ),
1331            (
1332                // INFO=<ID=SVTYPE,Number=1,Type=String>
1333                "SVTYPE",
1334                header::TagType::String,
1335                header::TagLength::Fixed(1),
1336            ),
1337            (
1338                // INFO=<ID=SVLEN,Number=.,Type=Integer>
1339                "SVLEN",
1340                header::TagType::Integer,
1341                header::TagLength::Variable,
1342            ),
1343            (
1344                // INFO<ID=CIGAR,Number=A,Type=String>
1345                "CIGAR",
1346                header::TagType::String,
1347                header::TagLength::AltAlleles,
1348            ),
1349        ];
1350        for (ref_name, ref_type, ref_length) in truth {
1351            let (tag_type, tag_length) = header.info_type(ref_name.as_bytes()).unwrap();
1352            assert_eq!(tag_type, ref_type);
1353            assert_eq!(tag_length, ref_length);
1354        }
1355
1356        assert!(header.info_type(b"NOT_THERE").is_err());
1357    }
1358
1359    #[test]
1360    fn test_remove_alleles() {
1361        let mut bcf = Reader::from_path("test/test_multi.bcf").unwrap();
1362        for res in bcf.records() {
1363            let mut record = res.unwrap();
1364            if record.pos() == 10080 {
1365                record.remove_alleles(&[false, false, true]).unwrap();
1366                assert_eq!(record.alleles(), [b"A", b"C"]);
1367            }
1368        }
1369    }
1370
1371    // Helper function reading full file into string.
1372    fn read_all<P: AsRef<Path>>(path: P) -> String {
1373        let mut file = File::open(path.as_ref())
1374            .unwrap_or_else(|_| panic!("Unable to open the file: {:?}", path.as_ref()));
1375        let mut contents = String::new();
1376        file.read_to_string(&mut contents)
1377            .unwrap_or_else(|_| panic!("Unable to read the file: {:?}", path.as_ref()));
1378        contents
1379    }
1380
1381    // Open `test_various.vcf`, add a record from scratch to it and write it out again.
1382    //
1383    // This exercises the full functionality of updating information in a `record::Record`.
1384    #[test]
1385    fn test_write_various() {
1386        // Open reader, then create writer.
1387        let tmp = tempfile::Builder::new()
1388            .prefix("rust-htslib")
1389            .tempdir()
1390            .expect("Cannot create temp dir");
1391        let out_path = tmp.path().join("test_various.out.vcf");
1392
1393        let vcf = Reader::from_path("test/test_various.vcf").expect("Error opening file.");
1394        // The writer goes into its own block so we can ensure that the file is closed and
1395        // all data is written below.
1396        {
1397            let mut writer = Writer::from_path(
1398                &out_path,
1399                &Header::from_template(vcf.header()),
1400                true,
1401                Format::Vcf,
1402            )
1403            .expect("Error opening file.");
1404
1405            // Setup empty record, filled below.
1406            let mut record = writer.empty_record();
1407
1408            record.set_rid(Some(0));
1409            assert_eq!(record.rid().unwrap(), 0);
1410
1411            record.set_pos(12);
1412            assert_eq!(record.pos(), 12);
1413
1414            assert_eq!(str::from_utf8(record.id().as_ref()).unwrap(), ".");
1415            record.set_id(b"to_be_cleared").unwrap();
1416            assert_eq!(
1417                str::from_utf8(record.id().as_ref()).unwrap(),
1418                "to_be_cleared"
1419            );
1420            record.clear_id().unwrap();
1421            assert_eq!(str::from_utf8(record.id().as_ref()).unwrap(), ".");
1422            record.set_id(b"first_id").unwrap();
1423            record.push_id(b"second_id").unwrap();
1424            record.push_id(b"first_id").unwrap();
1425
1426            assert!(record.filters().next().is_none());
1427            record.set_filters(&["q10".as_bytes()]).unwrap();
1428            record.push_filter("s50".as_bytes()).unwrap();
1429            record.remove_filter("q10".as_bytes(), true).unwrap();
1430            record.push_filter("q10".as_bytes()).unwrap();
1431
1432            record.set_alleles(&[b"C", b"T", b"G"]).unwrap();
1433
1434            record.set_qual(10.0);
1435
1436            record.push_info_integer(b"N1", &[32]).unwrap();
1437            record.push_info_float(b"F1", &[33.0]).unwrap();
1438            record.push_info_string(b"S1", &[b"fourtytwo"]).unwrap();
1439            record.push_info_flag(b"X1").unwrap();
1440
1441            record
1442                .push_genotypes(&[
1443                    GenotypeAllele::Unphased(0),
1444                    GenotypeAllele::Unphased(1),
1445                    GenotypeAllele::Unphased(1),
1446                    GenotypeAllele::Phased(1),
1447                ])
1448                .unwrap();
1449
1450            record
1451                .push_format_string(b"FS1", &[&b"yes"[..], &b"no"[..]])
1452                .unwrap();
1453            record.push_format_integer(b"FF1", &[43, 11]).unwrap();
1454            record.push_format_float(b"FN1", &[42.0, 10.0]).unwrap();
1455            record
1456                .push_format_char(b"CH1", &[b"A"[0], b"B"[0]])
1457                .unwrap();
1458
1459            // Finally, write out the record.
1460            writer.write(&record).unwrap();
1461        }
1462
1463        // Now, compare expected and real output.
1464        let expected = read_all("test/test_various.out.vcf");
1465        let actual = read_all(&out_path);
1466        assert_eq!(expected, actual);
1467    }
1468
1469    #[test]
1470    fn test_remove_headers() {
1471        let vcf = Reader::from_path("test/test_headers.vcf").expect("Error opening file.");
1472        let tmp = tempfile::Builder::new()
1473            .prefix("rust-htslib")
1474            .tempdir()
1475            .expect("Cannot create temp dir");
1476        let vcfpath = tmp.path().join("test.vcf");
1477        let mut header = Header::from_template(&vcf.header);
1478        header
1479            .remove_contig(b"contig2")
1480            .remove_info(b"INFO2")
1481            .remove_format(b"FORMAT2")
1482            .remove_filter(b"FILTER2")
1483            .remove_structured(b"Foo2")
1484            .remove_generic(b"Bar2");
1485        {
1486            let mut _writer = Writer::from_path(&vcfpath, &header, true, Format::Vcf)
1487                .expect("Error opening output file.");
1488            // Note that we don't need to write anything, we are just looking at the header.
1489        }
1490
1491        let expected = read_all("test/test_headers.out.vcf");
1492        let actual = read_all(&vcfpath);
1493        assert_eq!(expected, actual);
1494    }
1495
1496    #[test]
1497    fn test_synced_reader() {
1498        let mut reader = synced::SyncedReader::new().unwrap();
1499        reader.set_require_index(true);
1500        reader.set_pairing(synced::pairing::SNPS);
1501
1502        assert_eq!(reader.reader_count(), 0);
1503        reader.add_reader("test/test_left.vcf.gz").unwrap();
1504        reader.add_reader("test/test_right.vcf.gz").unwrap();
1505        assert_eq!(reader.reader_count(), 2);
1506
1507        let res1 = reader.read_next();
1508        assert_eq!(res1.unwrap(), 2);
1509        assert!(reader.has_line(0));
1510        assert!(reader.has_line(1));
1511
1512        let res2 = reader.read_next();
1513        assert_eq!(res2.unwrap(), 1);
1514        assert!(reader.has_line(0));
1515        assert!(!reader.has_line(1));
1516
1517        let res3 = reader.read_next();
1518        assert_eq!(res3.unwrap(), 1);
1519        assert!(!reader.has_line(0));
1520        assert!(reader.has_line(1));
1521
1522        let res4 = reader.read_next();
1523        assert_eq!(res4.unwrap(), 0);
1524    }
1525
1526    #[test]
1527    fn test_synced_reader_fetch() {
1528        let mut reader = synced::SyncedReader::new().unwrap();
1529        reader.set_require_index(true);
1530        reader.set_pairing(synced::pairing::SNPS);
1531
1532        assert_eq!(reader.reader_count(), 0);
1533        reader.add_reader("test/test_left.vcf.gz").unwrap();
1534        reader.add_reader("test/test_right.vcf.gz").unwrap();
1535        assert_eq!(reader.reader_count(), 2);
1536
1537        reader.fetch(0, 0, 1000).unwrap();
1538        let res1 = reader.read_next();
1539        assert_eq!(res1.unwrap(), 2);
1540        assert!(reader.has_line(0));
1541        assert!(reader.has_line(1));
1542
1543        let res2 = reader.read_next();
1544        assert_eq!(res2.unwrap(), 1);
1545        assert!(reader.has_line(0));
1546        assert!(!reader.has_line(1));
1547
1548        let res3 = reader.read_next();
1549        assert_eq!(res3.unwrap(), 1);
1550        assert!(!reader.has_line(0));
1551        assert!(reader.has_line(1));
1552
1553        let res4 = reader.read_next();
1554        assert_eq!(res4.unwrap(), 0);
1555    }
1556
1557    #[test]
1558    fn test_svlen() {
1559        let mut reader = Reader::from_path("test/test_svlen.vcf").unwrap();
1560
1561        let mut record = reader.empty_record();
1562        reader.read(&mut record).unwrap().unwrap();
1563
1564        assert_eq!(
1565            *record.info(b"SVLEN").integer().unwrap().unwrap(),
1566            &[-127][..]
1567        );
1568    }
1569
1570    #[test]
1571    fn test_fails_on_bam() {
1572        let reader = Reader::from_path("test/test.bam");
1573        assert!(reader.is_err());
1574    }
1575
1576    #[test]
1577    fn test_fails_on_non_existiant() {
1578        let reader = Reader::from_path("test/no_such_file");
1579        assert!(reader.is_err());
1580    }
1581
1582    #[test]
1583    fn test_multi_string_info_tag() {
1584        let mut reader = Reader::from_path("test/test-info-multi-string.vcf").unwrap();
1585        let mut rec = reader.empty_record();
1586        let _ = reader.read(&mut rec);
1587
1588        assert_eq!(
1589            rec.info_shared_buffer(b"ANN", Buffer::new())
1590                .string()
1591                .unwrap()
1592                .unwrap()
1593                .len(),
1594            14
1595        );
1596    }
1597
1598    #[test]
1599    fn test_multi_string_info_tag_number_a() {
1600        let mut reader = Reader::from_path("test/test-info-multi-string-number=A.vcf").unwrap();
1601        let mut rec = reader.empty_record();
1602        let _ = reader.read(&mut rec);
1603
1604        assert_eq!(
1605            rec.info_shared_buffer(b"X", Buffer::new())
1606                .string()
1607                .unwrap()
1608                .unwrap()
1609                .len(),
1610            2
1611        );
1612    }
1613
1614    #[test]
1615    fn test_genotype_allele_conversion() {
1616        let allele = GenotypeAllele::Unphased(1);
1617        let converted: i32 = allele.into();
1618        let expected = 4;
1619        assert_eq!(converted, expected);
1620        let reverse_conversion = GenotypeAllele::from(expected);
1621        assert_eq!(allele, reverse_conversion);
1622    }
1623
1624    #[test]
1625    fn test_genotype_missing_allele_conversion() {
1626        let allele = GenotypeAllele::PhasedMissing;
1627        let converted: i32 = allele.into();
1628        let expected = 1;
1629        assert_eq!(converted, expected);
1630        let reverse_conversion = GenotypeAllele::from(expected);
1631        assert_eq!(allele, reverse_conversion);
1632    }
1633
1634    #[test]
1635    fn test_alt_allele_dosage() {
1636        let path = &"test/test_string.vcf";
1637        let mut bcf = Reader::from_path(path).expect("Error opening file.");
1638        let _header = bcf.header();
1639        // FORMAT fields of first record of the vcf should look like:
1640        // GT:FS1:FN1	./1:LongString1:1	1/1:ss1:2
1641        let first_record = bcf.records().next().unwrap().expect("Fail to read record");
1642        let sample_count = usize::try_from(first_record.sample_count()).unwrap();
1643        assert_eq!(sample_count, 2);
1644        let mut n_ref = vec![0; sample_count];
1645        let mut n_alt = vec![0; sample_count];
1646        let mut n_missing = vec![0; sample_count];
1647        let gts = first_record.genotypes().expect("Error reading genotypes");
1648        for sample_index in 0..sample_count {
1649            // for each sample
1650            for gta in gts.get(sample_index).iter() {
1651                // for each allele
1652                match gta.index() {
1653                    Some(0) => n_ref[sample_index] += 1,  // reference allele
1654                    Some(_) => n_alt[sample_index] += 1,  // alt allele
1655                    None => n_missing[sample_index] += 1, // missing allele
1656                }
1657            }
1658        }
1659        assert_eq!(n_ref, [0, 0]);
1660        assert_eq!(n_alt, [1, 2]);
1661        assert_eq!(n_missing, [1, 0]);
1662    }
1663
1664    #[test]
1665    fn test_obs_cornercase() {
1666        let mut reader = Reader::from_path("test/obs-cornercase.vcf").unwrap();
1667        let first_record = reader
1668            .records()
1669            .next()
1670            .unwrap()
1671            .expect("Fail to read record");
1672
1673        assert_eq!(
1674            *first_record.info(b"EVENT").string().unwrap().unwrap(),
1675            [b"gridss33fb_1085"]
1676        );
1677        assert_eq!(
1678            *first_record.info(b"MATEID").string().unwrap().unwrap(),
1679            [b"gridss33fb_1085h"]
1680        );
1681    }
1682
1683    #[test]
1684    fn test_trailing_omitted_format_fields() {
1685        let mut reader = Reader::from_path("test/test_trailing_omitted_format.vcf").unwrap();
1686        let first_record = reader
1687            .records()
1688            .next()
1689            .unwrap()
1690            .expect("Fail to read record");
1691
1692        let expected: Vec<&[u8]> = Vec::new();
1693        assert_eq!(*first_record.format(b"STR").string().unwrap(), expected,);
1694        assert_eq!(
1695            *first_record.format(b"INT").integer().unwrap(),
1696            vec![&[i32::missing()]],
1697        );
1698        assert!(first_record.format(b"FLT").float().unwrap()[0][0].is_nan(),);
1699    }
1700
1701    // #[test]
1702    // fn test_buffer_lifetime() {
1703    //     let mut reader = Reader::from_path("test/obs-cornercase.vcf").unwrap();
1704    //     let first_record = reader
1705    //         .records()
1706    //         .next()
1707    //         .unwrap()
1708    //         .expect("Fail to read record");
1709
1710    //     fn get_value<'a, 'b>(record: &'a Record) -> &'b [u8] {
1711    //         // FIXME: this should not be possible, because the slice outlives the buffer.
1712    //         let buffer: BufferBacked<'b, _, _> = record.info(b"EVENT").string().unwrap().unwrap();
1713    //         let value: &'b [u8] = buffer[0];
1714    //         value
1715    //     }
1716
1717    //     let buffered = first_record.info(b"EVENT").string().unwrap().unwrap();
1718    //     assert_eq!(get_value(&first_record), buffered[0]);
1719    // }
1720}