rust_htslib/bcf/
mod.rs

1// Copyright 2014 Johannes Köster.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6//! Module for working with VCF and BCF files.
7//!
8//! # Performance Remarks
9//!
10//! Note that BCF corresponds to the in-memory representation of BCF/VCF records in Htslib
11//! itself. Thus, it comes without a runtime penalty for parsing, in contrast to reading VCF
12//! files.
13//!
14//! # Example (reading)
15//!
16//!   - Obtaining 0-based locus index of the VCF record.
17//!   - Obtaining alleles of the VCF record.
18//!   - calculate alt-allele dosage in a mutli-sample VCF / BCF
19//!
20//! ```
21//! use crate::rust_htslib::bcf::{Reader, Read};
22//! use std::convert::TryFrom;
23//!
24//! let path = &"test/test_string.vcf";
25//! let mut bcf = Reader::from_path(path).expect("Error opening file.");
26//! // iterate through each row of the vcf body.
27//! for (i, record_result) in bcf.records().enumerate() {
28//!     let mut record = record_result.expect("Fail to read record");
29//!     let mut s = String::new();
30//!      for allele in record.alleles() {
31//!          for c in allele {
32//!              s.push(char::from(*c))
33//!          }
34//!          s.push(' ')
35//!      }
36//!     // 0-based position and the list of alleles
37//!     println!("Locus: {}, Alleles: {}", record.pos(), s);
38//!     // number of sample in the vcf
39//!     let sample_count = usize::try_from(record.sample_count()).unwrap();
40//!
41//!     // Counting ref, alt and missing alleles for each sample
42//!     let mut n_ref = vec![0; sample_count];
43//!     let mut n_alt = vec![0; sample_count];
44//!     let mut n_missing = vec![0; sample_count];
45//!     let gts = record.genotypes().expect("Error reading genotypes");
46//!     for sample_index in 0..sample_count {
47//!         // for each sample
48//!         for gta in gts.get(sample_index).iter() {
49//!             // for each allele
50//!             match gta.index() {
51//!                 Some(0) => n_ref[sample_index] += 1,  // reference allele
52//!                 Some(_) => n_alt[sample_index] += 1,  // alt allele
53//!                 None => n_missing[sample_index] += 1, // missing allele
54//!             }
55//!         }
56//!     }
57//! }
58//! ```
59//!
60//! # Example (writing)
61//!
62//!   - Setting up a VCF writer from scratch (including a simple header)
63//!   - Creating a VCF record and writing it to the VCF file
64//!
65//! ```
66//! use rust_htslib::bcf::{Format, Writer};
67//! use rust_htslib::bcf::header::Header;
68//! use rust_htslib::bcf::record::GenotypeAllele;
69//!
70//! // Create minimal VCF header with a single contig and a single sample
71//! let mut header = Header::new();
72//! let header_contig_line = r#"##contig=<ID=1,length=10>"#;
73//! header.push_record(header_contig_line.as_bytes());
74//! let header_gt_line = r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#;
75//! header.push_record(header_gt_line.as_bytes());
76//! header.push_sample("test_sample".as_bytes());
77//!
78//! // Write uncompressed VCF to stdout with above header and get an empty record
79//! let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
80//! let mut record = vcf.empty_record();
81//!
82//! // Set chrom and pos to 1 and 7, respectively - note the 0-based positions
83//! let rid = vcf.header().name2rid(b"1").unwrap();
84//! record.set_rid(Some(rid));
85//! record.set_pos(6);
86//!
87//! // Set record genotype to 0|1 - note first allele is always unphased
88//! let alleles = &[GenotypeAllele::Unphased(0), GenotypeAllele::Phased(1)];
89//! record.push_genotypes(alleles).unwrap();
90//!
91//! // Write record
92//! vcf.write(&record).unwrap()
93//! ```
94//!
95//! This will print the following VCF to stdout:
96//!
97//! ```lang-none
98//! ##fileformat=VCFv4.2
99//! ##FILTER=<ID=PASS,Description="All filters passed">
100//! ##contig=<ID=1,length=10>
101//! ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
102//! #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  test_sample
103//! 1       7       .       .       .       0       .       .       GT      0|1
104//! ```
105
106use std::ffi;
107use std::path::Path;
108use std::rc::Rc;
109use std::str;
110
111use url::Url;
112
113pub mod buffer;
114pub mod header;
115pub mod index;
116pub mod record;
117
118use crate::bcf::header::{HeaderView, SampleSubset};
119use crate::errors::{Error, Result};
120use crate::htslib;
121
122pub use crate::bcf::header::{Header, HeaderRecord};
123pub use crate::bcf::record::Record;
124
125/// A trait for a BCF reader with a read method.
126pub trait Read: Sized {
127    /// Read the next record.
128    ///
129    /// # Arguments
130    /// * record - an empty record, that can be created with `bcf::Reader::empty_record`.
131    ///
132    /// # Returns
133    /// None if end of file was reached, otherwise Some will contain
134    /// a result with an error in case of failure.
135    fn read(&mut self, record: &mut record::Record) -> Option<Result<()>>;
136
137    /// Return an iterator over all records of the VCF/BCF file.
138    fn records(&mut self) -> Records<'_, Self>;
139
140    /// Return the header.
141    fn header(&self) -> &HeaderView;
142
143    /// Return empty record.  Can be reused multiple times.
144    fn empty_record(&self) -> Record;
145
146    /// Activate multi-threaded BCF/VCF read support in htslib. This should permit faster
147    /// reading of large VCF files.
148    ///
149    /// Setting `nthreads` to `0` does not change the current state.  Note that it is not
150    /// possible to set the number of background threads below `1` once it has been set.
151    ///
152    /// # Arguments
153    ///
154    /// * `n_threads` - number of extra background writer threads to use, must be `> 0`.
155    fn set_threads(&mut self, n_threads: usize) -> Result<()>;
156}
157
158/// A VCF/BCF reader.
159#[derive(Debug)]
160pub struct Reader {
161    inner: *mut htslib::htsFile,
162    header: Rc<HeaderView>,
163}
164
165unsafe impl Send for Reader {}
166/// # Safety
167///
168/// Implementation for `Reader::set_threads()` and `Writer::set_threads`.
169pub unsafe fn set_threads(hts_file: *mut htslib::htsFile, n_threads: usize) -> Result<()> {
170    assert!(n_threads > 0, "n_threads must be > 0");
171
172    let r = htslib::hts_set_threads(hts_file, n_threads as i32);
173    if r != 0 {
174        Err(Error::SetThreads)
175    } else {
176        Ok(())
177    }
178}
179
180impl Reader {
181    /// Create a new reader from a given path.
182    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
183        match path.as_ref().to_str() {
184            Some(p) if !path.as_ref().exists() => Err(Error::FileNotFound { path: p.into() }),
185            Some(p) => Self::new(p.as_bytes()),
186            _ => Err(Error::NonUnicodePath),
187        }
188    }
189
190    /// Create a new reader from a given URL.
191    pub fn from_url(url: &Url) -> Result<Self> {
192        Self::new(url.as_str().as_bytes())
193    }
194
195    /// Create a new reader from standard input.
196    pub fn from_stdin() -> Result<Self> {
197        Self::new(b"-")
198    }
199
200    fn new(path: &[u8]) -> Result<Self> {
201        let htsfile = bcf_open(path, b"r")?;
202        let header = unsafe { htslib::bcf_hdr_read(htsfile) };
203        Ok(Reader {
204            inner: htsfile,
205            header: Rc::new(HeaderView::new(header)),
206        })
207    }
208}
209
210impl Read for Reader {
211    fn read(&mut self, record: &mut record::Record) -> Option<Result<()>> {
212        match unsafe { htslib::bcf_read(self.inner, self.header.inner, record.inner) } {
213            0 => {
214                unsafe {
215                    // Always unpack record.
216                    htslib::bcf_unpack(record.inner_mut(), htslib::BCF_UN_ALL as i32);
217                }
218                record.set_header(Rc::clone(&self.header));
219                Some(Ok(()))
220            }
221            -1 => None,
222            _ => Some(Err(Error::BcfInvalidRecord)),
223        }
224    }
225
226    fn records(&mut self) -> Records<'_, Self> {
227        Records { reader: self }
228    }
229
230    fn set_threads(&mut self, n_threads: usize) -> Result<()> {
231        unsafe { set_threads(self.inner, n_threads) }
232    }
233
234    fn header(&self) -> &HeaderView {
235        &self.header
236    }
237
238    /// Return empty record.  Can be reused multiple times.
239    fn empty_record(&self) -> Record {
240        self.header.empty_record()
241    }
242}
243
244impl Drop for Reader {
245    fn drop(&mut self) {
246        unsafe {
247            htslib::hts_close(self.inner);
248        }
249    }
250}
251
252/// An indexed VCF/BCF reader.
253#[derive(Debug)]
254pub struct IndexedReader {
255    /// The synced VCF/BCF reader to use internally.
256    inner: *mut htslib::bcf_srs_t,
257    /// The header.
258    header: Rc<HeaderView>,
259
260    /// The position of the previous fetch, if any.
261    current_region: Option<(u32, u64, Option<u64>)>,
262}
263
264unsafe impl Send for IndexedReader {}
265
266impl IndexedReader {
267    /// Create a new `IndexedReader` from path.
268    ///
269    /// # Arguments
270    ///
271    /// * `path` - the path to open.
272    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
273        let path = path.as_ref();
274        match path.to_str() {
275            Some(p) if path.exists() => {
276                Self::new(&ffi::CString::new(p).map_err(|_| Error::NonUnicodePath)?)
277            }
278            Some(p) => Err(Error::FileNotFound { path: p.into() }),
279            None => Err(Error::NonUnicodePath),
280        }
281    }
282
283    /// Create a new `IndexedReader` from an URL.
284    pub fn from_url(url: &Url) -> Result<Self> {
285        Self::new(&ffi::CString::new(url.as_str()).unwrap())
286    }
287
288    /// Create a new `IndexedReader`.
289    ///
290    /// # Arguments
291    ///
292    /// * `path` - the path. Use "-" for stdin.
293    fn new(path: &ffi::CStr) -> Result<Self> {
294        // Create reader and require existence of index file.
295        let ser_reader = unsafe { htslib::bcf_sr_init() };
296        unsafe {
297            htslib::bcf_sr_set_opt(ser_reader, 0);
298        } // 0: BCF_SR_REQUIRE_IDX
299          // Attach a file with the path from the arguments.
300        if unsafe { htslib::bcf_sr_add_reader(ser_reader, path.as_ptr()) } >= 0 {
301            let header = Rc::new(HeaderView::new(unsafe {
302                htslib::bcf_hdr_dup((*(*ser_reader).readers.offset(0)).header)
303            }));
304            Ok(IndexedReader {
305                inner: ser_reader,
306                header,
307                current_region: None,
308            })
309        } else {
310            Err(Error::BcfOpen {
311                target: path.to_str().unwrap().to_owned(),
312            })
313        }
314    }
315
316    /// Jump to the given region.
317    ///
318    /// # Arguments
319    ///
320    /// * `rid` - numeric ID of the reference to jump to; use `HeaderView::name2rid` for resolving
321    ///   contig name to ID.
322    /// * `start` - `0`-based **inclusive** start coordinate of region on reference.
323    /// * `end` - Optional `0`-based **inclusive** end coordinate of region on reference. If `None`
324    ///   is given, records are fetched from `start` until the end of the contig.
325    ///
326    /// # Note
327    /// The entire contig can be fetched by setting `start` to `0` and `end` to `None`.
328    pub fn fetch(&mut self, rid: u32, start: u64, end: Option<u64>) -> Result<()> {
329        let contig = self.header.rid2name(rid)?;
330        let contig = ffi::CString::new(contig).unwrap();
331        if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
332            Err(Error::GenomicSeek {
333                contig: contig.to_str().unwrap().to_owned(),
334                start,
335            })
336        } else {
337            self.current_region = Some((rid, start, end));
338            Ok(())
339        }
340    }
341}
342
343impl Read for IndexedReader {
344    fn read(&mut self, record: &mut record::Record) -> Option<Result<()>> {
345        match unsafe { htslib::bcf_sr_next_line(self.inner) } {
346            0 => {
347                if unsafe { (*self.inner).errnum } != 0 {
348                    Some(Err(Error::BcfInvalidRecord))
349                } else {
350                    None
351                }
352            }
353            i => {
354                assert!(i > 0, "Must not be negative");
355                // Note that the sync BCF reader has a different interface than the others
356                // as it keeps its own buffer already for each record.  An alternative here
357                // would be to replace the `inner` value by an enum that can be a pointer
358                // into a synced reader or an owning popinter to an allocated record.
359                unsafe {
360                    htslib::bcf_copy(
361                        record.inner,
362                        *(*(*self.inner).readers.offset(0)).buffer.offset(0),
363                    );
364                }
365
366                unsafe {
367                    // Always unpack record.
368                    htslib::bcf_unpack(record.inner_mut(), htslib::BCF_UN_ALL as i32);
369                }
370
371                record.set_header(Rc::clone(&self.header));
372
373                match self.current_region {
374                    Some((rid, _start, end)) => {
375                        let endpos = match end {
376                            Some(e) => e,
377                            None => u64::MAX,
378                        };
379                        if Some(rid) == record.rid() && record.pos() as u64 <= endpos {
380                            Some(Ok(()))
381                        } else {
382                            None
383                        }
384                    }
385                    None => Some(Ok(())),
386                }
387            }
388        }
389    }
390
391    fn records(&mut self) -> Records<'_, Self> {
392        Records { reader: self }
393    }
394
395    fn set_threads(&mut self, n_threads: usize) -> Result<()> {
396        assert!(n_threads > 0, "n_threads must be > 0");
397
398        let r = unsafe { htslib::bcf_sr_set_threads(self.inner, n_threads as i32) };
399        if r != 0 {
400            Err(Error::SetThreads)
401        } else {
402            Ok(())
403        }
404    }
405
406    fn header(&self) -> &HeaderView {
407        &self.header
408    }
409
410    fn empty_record(&self) -> Record {
411        Record::new(Rc::clone(&self.header))
412    }
413}
414
415impl Drop for IndexedReader {
416    fn drop(&mut self) {
417        unsafe { htslib::bcf_sr_destroy(self.inner) };
418    }
419}
420
421/// This module contains the `SyncedReader` class and related code.
422pub mod synced {
423
424    use super::*;
425
426    /// This module contains bitmask constants for `SyncedReader`.
427    pub mod pairing {
428        /// Allow different alleles, as long as they all are SNPs.
429        pub const SNPS: u32 = crate::htslib::BCF_SR_PAIR_SNPS;
430        /// The same as above, but with indels.
431        pub const INDELS: u32 = crate::htslib::BCF_SR_PAIR_INDELS;
432        /// Any combination of alleles can be returned by `bcf_sr_next_line()`.
433        pub const ANY: u32 = crate::htslib::BCF_SR_PAIR_ANY;
434        /// At least some of multiallelic ALTs must match.  Implied by all the others with the exception of `EXACT`.
435        pub const SOME: u32 = crate::htslib::BCF_SR_PAIR_SOME;
436        /// Allow REF-only records with SNPs.
437        pub const SNP_REF: u32 = crate::htslib::BCF_SR_PAIR_SNP_REF;
438        /// Allow REF-only records with indels.
439        pub const INDEL_REF: u32 = crate::htslib::BCF_SR_PAIR_INDEL_REF;
440        /// Require the exact same set of alleles in all files.
441        pub const EXACT: u32 = crate::htslib::BCF_SR_PAIR_EXACT;
442        /// `SNPS | INDELS`.
443        pub const BOTH: u32 = crate::htslib::BCF_SR_PAIR_BOTH;
444        /// `SNPS | INDELS | SNP_REF | INDEL_REF`.
445        pub const BOTH_REF: u32 = crate::htslib::BCF_SR_PAIR_BOTH_REF;
446    }
447
448    /// A wrapper for `bcf_srs_t`; allows joint traversal of multiple VCF and/or BCF files.
449    #[derive(Debug)]
450    pub struct SyncedReader {
451        /// Internal handle for the synced reader.
452        inner: *mut crate::htslib::bcf_srs_t,
453
454        /// RC's of `HeaderView`s of the readers.
455        headers: Vec<Rc<HeaderView>>,
456
457        /// The position of the previous fetch, if any.
458        current_region: Option<(u32, u64, u64)>,
459    }
460
461    // TODO: add interface for setting threads, ensure that the pool is freed properly
462    impl SyncedReader {
463        pub fn new() -> Result<Self> {
464            let inner = unsafe { crate::htslib::bcf_sr_init() };
465            if inner.is_null() {
466                return Err(Error::BcfAllocationError);
467            }
468
469            Ok(SyncedReader {
470                inner,
471                headers: Vec::new(),
472                current_region: None,
473            })
474        }
475
476        /// Enable or disable requiring of index
477        pub fn set_require_index(&mut self, do_require: bool) {
478            unsafe {
479                (*self.inner).require_index = if do_require { 1 } else { 0 };
480            }
481        }
482
483        /// Set the given bitmask of values from `sr_pairing` module.
484        pub fn set_pairing(&mut self, bitmask: u32) {
485            unsafe {
486                // TODO: 1 actually is BCF_SR_PAIR_LOGIC but is not available here?
487                crate::htslib::bcf_sr_set_opt(self.inner, 1, bitmask);
488            }
489        }
490
491        /// Add new reader with the path to the file.
492        pub fn add_reader<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
493            match path.as_ref().to_str() {
494                Some(p) if path.as_ref().exists() => {
495                    let p_cstring = ffi::CString::new(p).unwrap();
496                    let res =
497                        unsafe { crate::htslib::bcf_sr_add_reader(self.inner, p_cstring.as_ptr()) };
498
499                    if res == 0 {
500                        return Err(Error::BcfOpen {
501                            target: p.to_owned(),
502                        });
503                    }
504
505                    let i = (self.reader_count() - 1) as isize;
506                    let header = Rc::new(HeaderView::new(unsafe {
507                        crate::htslib::bcf_hdr_dup((*(*self.inner).readers.offset(i)).header)
508                    }));
509                    self.headers.push(header);
510                    Ok(())
511                }
512                _ => Err(Error::NonUnicodePath),
513            }
514        }
515
516        /// Remove reader with the given index.
517        pub fn remove_reader(&mut self, idx: u32) {
518            if idx >= self.reader_count() {
519                panic!("Invalid reader!");
520            } else {
521                unsafe {
522                    crate::htslib::bcf_sr_remove_reader(self.inner, idx as i32);
523                }
524                self.headers.remove(idx as usize);
525            }
526        }
527
528        /// Return number of open files/readers.
529        pub fn reader_count(&self) -> u32 {
530            unsafe { (*self.inner).nreaders as u32 }
531        }
532
533        /// Read next line and return number of readers that have the given line (0 if end of all files is reached).
534        pub fn read_next(&mut self) -> Result<u32> {
535            let num = unsafe { crate::htslib::bcf_sr_next_line(self.inner) as u32 };
536
537            if num == 0 {
538                if unsafe { (*self.inner).errnum } != 0 {
539                    return Err(Error::BcfInvalidRecord);
540                }
541                Ok(0)
542            } else {
543                assert!(num > 0, "num returned by htslib must not be negative");
544                match self.current_region {
545                    Some((rid, _start, end)) => {
546                        for idx in 0..self.reader_count() {
547                            if !self.has_line(idx) {
548                                continue;
549                            }
550                            unsafe {
551                                let record = *(*(*self.inner).readers.offset(idx as isize))
552                                    .buffer
553                                    .offset(0);
554                                if (*record).rid != (rid as i32) || (*record).pos >= (end as i64) {
555                                    return Ok(0);
556                                }
557                            }
558                        }
559                        Ok(num)
560                    }
561                    None => Ok(num),
562                }
563            }
564        }
565
566        /// Return whether the given reader has the line.
567        pub fn has_line(&self, idx: u32) -> bool {
568            if idx >= self.reader_count() {
569                panic!("Invalid reader!");
570            } else {
571                unsafe { (*(*self.inner).has_line.offset(idx as isize)) != 0 }
572            }
573        }
574
575        /// Return record from the given reader, if any.
576        pub fn record(&self, idx: u32) -> Option<Record> {
577            if self.has_line(idx) {
578                let record = Record::new(self.headers[idx as usize].clone());
579                unsafe {
580                    crate::htslib::bcf_copy(
581                        record.inner,
582                        *(*(*self.inner).readers.offset(idx as isize))
583                            .buffer
584                            .offset(0),
585                    );
586                }
587                Some(record)
588            } else {
589                None
590            }
591        }
592
593        /// Return header from the given reader.
594        pub fn header(&self, idx: u32) -> &HeaderView {
595            // TODO: is the mutability here correct?
596            if idx >= self.reader_count() {
597                panic!("Invalid reader!");
598            } else {
599                &self.headers[idx as usize]
600            }
601        }
602
603        /// Jump to the given region.
604        ///
605        /// # Arguments
606        ///
607        /// * `rid` - numeric ID of the reference to jump to; use `HeaderView::name2rid` for resolving
608        ///   contig name to ID.
609        /// * `start` - `0`-based start coordinate of region on reference.
610        /// * `end` - `0`-based end coordinate of region on reference.
611        pub fn fetch(&mut self, rid: u32, start: u64, end: u64) -> Result<()> {
612            let contig = {
613                let contig = self.header(0).rid2name(rid).unwrap(); //.clone();
614                ffi::CString::new(contig).unwrap()
615            };
616            if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
617                Err(Error::GenomicSeek {
618                    contig: contig.to_str().unwrap().to_owned(),
619                    start,
620                })
621            } else {
622                self.current_region = Some((rid, start, end));
623                Ok(())
624            }
625        }
626    }
627
628    impl Drop for SyncedReader {
629        fn drop(&mut self) {
630            unsafe { crate::htslib::bcf_sr_destroy(self.inner) };
631        }
632    }
633}
634
635#[derive(Clone, Copy, Debug, PartialEq, Eq)]
636pub enum Format {
637    Vcf,
638    Bcf,
639}
640
641/// A VCF/BCF writer.
642#[derive(Debug)]
643pub struct Writer {
644    inner: *mut htslib::htsFile,
645    header: Rc<HeaderView>,
646    subset: Option<SampleSubset>,
647}
648
649unsafe impl Send for Writer {}
650
651impl Writer {
652    /// Create a new writer that writes to the given path.
653    ///
654    /// # Arguments
655    ///
656    /// * `path` - the path
657    /// * `header` - header definition to use
658    /// * `uncompressed` - disable compression
659    /// * `vcf` - write VCF instead of BCF
660    pub fn from_path<P: AsRef<Path>>(
661        path: P,
662        header: &Header,
663        uncompressed: bool,
664        format: Format,
665    ) -> Result<Self> {
666        if let Some(p) = path.as_ref().to_str() {
667            Ok(Self::new(p.as_bytes(), header, uncompressed, format)?)
668        } else {
669            Err(Error::NonUnicodePath)
670        }
671    }
672
673    /// Create a new writer from a URL.
674    ///
675    /// # Arguments
676    ///
677    /// * `url` - the URL
678    /// * `header` - header definition to use
679    /// * `uncompressed` - disable compression
680    /// * `vcf` - write VCF instead of BCF
681    pub fn from_url(
682        url: &Url,
683        header: &Header,
684        uncompressed: bool,
685        format: Format,
686    ) -> Result<Self> {
687        Self::new(url.as_str().as_bytes(), header, uncompressed, format)
688    }
689
690    /// Create a new writer to stdout.
691    ///
692    /// # Arguments
693    ///
694    /// * `header` - header definition to use
695    /// * `uncompressed` - disable compression
696    /// * `vcf` - write VCF instead of BCF
697    pub fn from_stdout(header: &Header, uncompressed: bool, format: Format) -> Result<Self> {
698        Self::new(b"-", header, uncompressed, format)
699    }
700
701    fn new(path: &[u8], header: &Header, uncompressed: bool, format: Format) -> Result<Self> {
702        let mode: &[u8] = match (uncompressed, format) {
703            (true, Format::Vcf) => b"w",
704            (false, Format::Vcf) => b"wz",
705            (true, Format::Bcf) => b"wbu",
706            (false, Format::Bcf) => b"wb",
707        };
708
709        let htsfile = bcf_open(path, mode)?;
710        unsafe { htslib::bcf_hdr_write(htsfile, header.inner) };
711        Ok(Writer {
712            inner: htsfile,
713            header: Rc::new(HeaderView::new(unsafe {
714                htslib::bcf_hdr_dup(header.inner)
715            })),
716            subset: header.subset.clone(),
717        })
718    }
719
720    /// Obtain reference to the lightweight `HeaderView` of the BCF header.
721    pub fn header(&self) -> &HeaderView {
722        &self.header
723    }
724
725    /// Create empty record for writing to this writer.
726    ///
727    /// This record can then be reused multiple times.
728    pub fn empty_record(&self) -> Record {
729        record::Record::new(Rc::clone(&self.header))
730    }
731
732    /// Translate record to header of this writer.
733    ///
734    /// # Arguments
735    ///
736    /// - `record` - The `Record` to translate.
737    pub fn translate(&mut self, record: &mut record::Record) {
738        unsafe {
739            htslib::bcf_translate(self.header.inner, record.header().inner, record.inner);
740        }
741        record.set_header(Rc::clone(&self.header));
742    }
743
744    /// Subset samples of record to match header of this writer.
745    ///
746    /// # Arguments
747    ///
748    /// - `record` - The `Record` to modify.
749    pub fn subset(&mut self, record: &mut record::Record) {
750        if let Some(ref mut subset) = self.subset {
751            unsafe {
752                htslib::bcf_subset(
753                    self.header.inner,
754                    record.inner,
755                    subset.len() as i32,
756                    subset.as_mut_ptr(),
757                );
758            }
759        }
760    }
761
762    /// Write `record` to the Writer.
763    ///
764    /// # Arguments
765    ///
766    /// - `record` - The `Record` to write.
767    pub fn write(&mut self, record: &record::Record) -> Result<()> {
768        if unsafe { htslib::bcf_write(self.inner, self.header.inner, record.inner) } == -1 {
769            Err(Error::WriteRecord)
770        } else {
771            Ok(())
772        }
773    }
774
775    /// Activate multi-threaded BCF write support in htslib. This should permit faster
776    /// writing of large BCF files.
777    ///
778    /// # Arguments
779    ///
780    /// * `n_threads` - number of extra background writer threads to use, must be `> 0`.
781    pub fn set_threads(&mut self, n_threads: usize) -> Result<()> {
782        unsafe { set_threads(self.inner, n_threads) }
783    }
784}
785
786impl Drop for Writer {
787    fn drop(&mut self) {
788        unsafe {
789            htslib::hts_close(self.inner);
790        }
791    }
792}
793
794#[derive(Debug)]
795pub struct Records<'a, R: Read> {
796    reader: &'a mut R,
797}
798
799impl<R: Read> Iterator for Records<'_, R> {
800    type Item = Result<record::Record>;
801
802    fn next(&mut self) -> Option<Result<record::Record>> {
803        let mut record = self.reader.empty_record();
804        match self.reader.read(&mut record) {
805            Some(Err(e)) => Some(Err(e)),
806            Some(Ok(_)) => Some(Ok(record)),
807            None => None,
808        }
809    }
810}
811
812/// Wrapper for opening a BCF file.
813fn bcf_open(target: &[u8], mode: &[u8]) -> Result<*mut htslib::htsFile> {
814    let p = ffi::CString::new(target).unwrap();
815    let c_str = ffi::CString::new(mode).unwrap();
816    let ret = unsafe { htslib::hts_open(p.as_ptr(), c_str.as_ptr()) };
817
818    if ret.is_null() {
819        return Err(Error::BcfOpen {
820            target: str::from_utf8(target).unwrap().to_owned(),
821        });
822    }
823
824    unsafe {
825        if !(mode.contains(&b'w')
826            || (*ret).format.category == htslib::htsFormatCategory_variant_data)
827        {
828            return Err(Error::BcfOpen {
829                target: str::from_utf8(target).unwrap().to_owned(),
830            });
831        }
832    }
833    Ok(ret)
834}
835
836#[cfg(test)]
837mod tests {
838    use tempfile::NamedTempFile;
839
840    use super::record::Buffer;
841    use super::*;
842    use crate::bcf::header::Id;
843    use crate::bcf::record::GenotypeAllele;
844    use crate::bcf::record::Numeric;
845    use crate::bcf::Reader;
846    use std::convert::TryFrom;
847    use std::fs::File;
848    use std::io::prelude::Read as IoRead;
849    use std::path::Path;
850    use std::str;
851
852    fn _test_read<P: AsRef<Path>>(path: &P) {
853        let mut bcf = Reader::from_path(path).expect("Error opening file.");
854        assert_eq!(bcf.header.samples(), [b"NA12878.subsample-0.25-0"]);
855
856        for (i, rec) in bcf.records().enumerate() {
857            let record = rec.expect("Error reading record.");
858            assert_eq!(record.sample_count(), 1);
859
860            assert_eq!(record.rid().expect("Error reading rid."), 0);
861            assert_eq!(record.pos(), 10021 + i as i64);
862            assert!((record.qual() - 0f32).abs() < f32::EPSILON);
863            let mut buffer = Buffer::new();
864            assert!(
865                (record
866                    .info_shared_buffer(b"MQ0F", &mut buffer)
867                    .float()
868                    .expect("Error reading info.")
869                    .expect("Missing tag")[0]
870                    - 1.0)
871                    .abs()
872                    < f32::EPSILON
873            );
874            if i == 59 {
875                assert!(
876                    (record
877                        .info_shared_buffer(b"SGB", &mut buffer)
878                        .float()
879                        .expect("Error reading info.")
880                        .expect("Missing tag")[0]
881                        - -0.379885)
882                        .abs()
883                        < f32::EPSILON
884                );
885            }
886            // the artificial "not observed" allele is present in each record.
887            assert_eq!(record.alleles().iter().last().unwrap(), b"<X>");
888
889            let fmt = record.format(b"PL");
890            let pl = fmt.integer().expect("Error reading format.");
891            assert_eq!(pl.len(), 1);
892            if i == 59 {
893                assert_eq!(pl[0].len(), 6);
894            } else {
895                assert_eq!(pl[0].len(), 3);
896            }
897        }
898    }
899
900    #[test]
901    fn test_read() {
902        _test_read(&"test/test.bcf");
903    }
904
905    #[test]
906    fn test_reader_set_threads() {
907        let path = &"test/test.bcf";
908        let mut bcf = Reader::from_path(path).expect("Error opening file.");
909        bcf.set_threads(2).unwrap();
910    }
911
912    #[test]
913    fn test_writer_set_threads() {
914        let path = &"test/test.bcf";
915        let tmp = tempfile::Builder::new()
916            .prefix("rust-htslib")
917            .tempdir()
918            .expect("Cannot create temp dir");
919        let bcfpath = tmp.path().join("test.bcf");
920        let bcf = Reader::from_path(path).expect("Error opening file.");
921        let header = Header::from_template_subset(&bcf.header, &[b"NA12878.subsample-0.25-0"])
922            .expect("Error subsetting samples.");
923        let mut writer =
924            Writer::from_path(&bcfpath, &header, false, Format::Bcf).expect("Error opening file.");
925        writer.set_threads(2).unwrap();
926    }
927
928    #[test]
929    fn test_fetch() {
930        let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
931        bcf.set_threads(2).unwrap();
932        let rid = bcf
933            .header()
934            .name2rid(b"1")
935            .expect("Translating from contig '1' to ID failed.");
936        bcf.fetch(rid, 10_033, Some(10_060))
937            .expect("Fetching failed");
938        assert_eq!(bcf.records().count(), 28);
939    }
940
941    #[test]
942    fn test_fetch_all() {
943        let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
944        bcf.set_threads(2).unwrap();
945        let rid = bcf
946            .header()
947            .name2rid(b"1")
948            .expect("Translating from contig '1' to ID failed.");
949        bcf.fetch(rid, 0, None).expect("Fetching failed");
950        assert_eq!(bcf.records().count(), 62);
951    }
952
953    #[test]
954    fn test_fetch_open_ended() {
955        let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
956        bcf.set_threads(2).unwrap();
957        let rid = bcf
958            .header()
959            .name2rid(b"1")
960            .expect("Translating from contig '1' to ID failed.");
961        bcf.fetch(rid, 10077, None).expect("Fetching failed");
962        assert_eq!(bcf.records().count(), 6);
963    }
964
965    #[test]
966    fn test_fetch_start_greater_than_last_vcf_pos() {
967        let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
968        bcf.set_threads(2).unwrap();
969        let rid = bcf
970            .header()
971            .name2rid(b"1")
972            .expect("Translating from contig '1' to ID failed.");
973        bcf.fetch(rid, 20077, None).expect("Fetching failed");
974        assert_eq!(bcf.records().count(), 0);
975    }
976
977    #[test]
978    fn test_write() {
979        let mut bcf = Reader::from_path("test/test_multi.bcf").expect("Error opening file.");
980        let tmp = tempfile::Builder::new()
981            .prefix("rust-htslib")
982            .tempdir()
983            .expect("Cannot create temp dir");
984        let bcfpath = tmp.path().join("test.bcf");
985        println!("{:?}", bcfpath);
986        {
987            let header = Header::from_template_subset(&bcf.header, &[b"NA12878.subsample-0.25-0"])
988                .expect("Error subsetting samples.");
989            let mut writer = Writer::from_path(&bcfpath, &header, false, Format::Bcf)
990                .expect("Error opening file.");
991            for rec in bcf.records() {
992                let mut record = rec.expect("Error reading record.");
993                writer.translate(&mut record);
994                writer.subset(&mut record);
995                record.trim_alleles().expect("Error trimming alleles.");
996                writer.write(&record).expect("Error writing record");
997            }
998        }
999        {
1000            _test_read(&bcfpath);
1001        }
1002        tmp.close().expect("Failed to delete temp dir");
1003    }
1004
1005    #[test]
1006    fn test_strings() {
1007        let mut vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1008        let fs1 = [
1009            &b"LongString1"[..],
1010            &b"LongString2"[..],
1011            &b"."[..],
1012            &b"LongString4"[..],
1013            &b"evenlength"[..],
1014            &b"ss6"[..],
1015        ];
1016        let mut buffer = Buffer::new();
1017        for (i, rec) in vcf.records().enumerate() {
1018            println!("record {}", i);
1019            let record = rec.expect("Error reading record.");
1020            assert_eq!(
1021                record
1022                    .info_shared_buffer(b"S1", &mut buffer)
1023                    .string()
1024                    .expect("Error reading string.")
1025                    .expect("Missing tag")[0],
1026                format!("string{}", i + 1).as_bytes()
1027            );
1028            let fs1_str_vec = record
1029                .format_shared_buffer(b"FS1", &mut buffer)
1030                .string()
1031                .expect("Error reading string.");
1032            assert_eq!(fs1_str_vec.len(), 2);
1033            println!("{}", String::from_utf8_lossy(fs1_str_vec[0]));
1034            assert_eq!(
1035                record
1036                    .format(b"FS1")
1037                    .string()
1038                    .expect("Error reading string.")[0],
1039                fs1[i]
1040            );
1041        }
1042    }
1043
1044    #[test]
1045    fn test_missing() {
1046        let mut vcf = Reader::from_path("test/test_missing.vcf").expect("Error opening file.");
1047        let fn4 = [
1048            &[
1049                i32::missing(),
1050                i32::missing(),
1051                i32::missing(),
1052                i32::missing(),
1053            ][..],
1054            &[i32::missing()][..],
1055        ];
1056        let f1 = [false, true];
1057        let mut buffer = Buffer::new();
1058        for (i, rec) in vcf.records().enumerate() {
1059            let record = rec.expect("Error reading record.");
1060            assert_eq!(
1061                record
1062                    .info_shared_buffer(b"F1", &mut buffer)
1063                    .float()
1064                    .expect("Error reading float.")
1065                    .expect("Missing tag")[0]
1066                    .is_nan(),
1067                f1[i]
1068            );
1069            assert_eq!(
1070                record
1071                    .format(b"FN4")
1072                    .integer()
1073                    .expect("Error reading integer.")[1],
1074                fn4[i]
1075            );
1076            assert!(
1077                record.format(b"FF4").float().expect("Error reading float.")[1]
1078                    .iter()
1079                    .all(|&v| v.is_missing())
1080            );
1081        }
1082    }
1083
1084    #[test]
1085    fn test_genotypes() {
1086        let mut vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1087        let expected = ["./1", "1|1", "0/1", "0|1", "1|.", "1/1"];
1088        for (rec, exp_gt) in vcf.records().zip(expected.iter()) {
1089            let rec = rec.expect("Error reading record.");
1090            let genotypes = rec.genotypes().expect("Error reading genotypes");
1091            assert_eq!(&format!("{}", genotypes.get(0)), exp_gt);
1092        }
1093    }
1094
1095    #[test]
1096    fn test_genotypes_read_mixed_ploidy() {
1097        let mut vcf = Reader::from_path("test/test_non_diploid.vcf").expect("Error opening file.");
1098
1099        // Expected genotypes for comparison
1100        let expected = [vec!["0", "1"], vec!["0/1", "1/1"], vec!["1|0", "1/1|0"]];
1101
1102        for (rec, exp_gts) in vcf.records().zip(expected.iter()) {
1103            let rec = rec.expect("Error reading record.");
1104
1105            // Get the genotypes from the record
1106            let genotypes = rec.genotypes().expect("Error reading genotypes");
1107
1108            // Compare each genotype with the expected value
1109            for (sample, exp_gt) in exp_gts.iter().enumerate() {
1110                assert_eq!(&format!("{}", genotypes.get(sample)), exp_gt);
1111            }
1112        }
1113    }
1114
1115    #[test]
1116    fn test_genotypes_write_and_read_mixed_ploidy() {
1117        let mut vcf = Reader::from_path("test/test_non_diploid.vcf").expect("Error opening file.");
1118
1119        // Create a temporary file to write the modified VCF data
1120        let tmp = NamedTempFile::new().unwrap();
1121        let path = tmp.path();
1122
1123        {
1124            // Create a VCF writer with the same header as the input VCF
1125            let mut writer = Writer::from_path(
1126                path,
1127                &Header::from_template(vcf.header()),
1128                true,
1129                Format::Vcf,
1130            )
1131            .unwrap();
1132
1133            // Modify record template by adding different genotypes and write the to the temp file.
1134            let mut rec_tpl = vcf.records().next().unwrap().unwrap();
1135            rec_tpl
1136                .push_genotype_structured(
1137                    &[
1138                        vec![GenotypeAllele::Unphased(0)],
1139                        vec![GenotypeAllele::Unphased(1)],
1140                    ],
1141                    3,
1142                )
1143                .unwrap();
1144            writer.write(&rec_tpl).unwrap();
1145            rec_tpl
1146                .push_genotype_structured(
1147                    &[
1148                        vec![GenotypeAllele::Unphased(0), GenotypeAllele::Unphased(1)],
1149                        vec![GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(1)],
1150                    ],
1151                    3,
1152                )
1153                .unwrap();
1154            writer.write(&rec_tpl).unwrap();
1155            rec_tpl
1156                .push_genotype_structured(
1157                    &[
1158                        vec![GenotypeAllele::Unphased(1), GenotypeAllele::Phased(0)],
1159                        vec![
1160                            GenotypeAllele::Unphased(1),
1161                            GenotypeAllele::Unphased(1),
1162                            GenotypeAllele::Phased(0),
1163                        ],
1164                    ],
1165                    3,
1166                )
1167                .unwrap();
1168            writer.write(&rec_tpl).unwrap();
1169        }
1170
1171        // Read back the temporary file with the modified VCF data
1172        let mut reader = Reader::from_path(path).unwrap();
1173
1174        // Expected genotypes for validation
1175        let expected = [vec!["0", "1"], vec!["0/1", "1/1"], vec!["1|0", "1/1|0"]];
1176
1177        // Iterate over the records in the temporary file and validate the genotypes
1178        for (rec, exp_gts) in reader.records().zip(expected.iter()) {
1179            let rec = rec.expect("Error reading record");
1180            let genotypes = rec.genotypes().expect("Error reading genotypes");
1181
1182            // Compare each genotype with the expected value
1183            for (sample, exp_gt) in exp_gts.iter().enumerate() {
1184                assert_eq!(&format!("{}", genotypes.get(sample)), exp_gt);
1185            }
1186        }
1187    }
1188
1189    #[test]
1190    fn test_genotypes_wrong_max_ploidy() {
1191        let mut vcf = Reader::from_path("test/test_non_diploid.vcf").expect("Error opening file.");
1192
1193        // Modify record template by adding different genotypes and write the to the temp file.
1194        let mut rec_tpl = vcf.records().next().unwrap().unwrap();
1195        let err = rec_tpl
1196            .push_genotype_structured(
1197                &[
1198                    vec![
1199                        GenotypeAllele::Unphased(0),
1200                        GenotypeAllele::Unphased(1),
1201                        GenotypeAllele::Unphased(0),
1202                    ],
1203                    vec![
1204                        GenotypeAllele::Unphased(1),
1205                        GenotypeAllele::Unphased(0),
1206                        GenotypeAllele::Unphased(1),
1207                        GenotypeAllele::Unphased(0),
1208                    ],
1209                ],
1210                3,
1211            )
1212            .expect_err(
1213                "This should fail since there are more alleles specified (4 for second sample) than max_ploidy (3) suggests",
1214            );
1215        assert_eq!(err, crate::errors::Error::BcfSetValues);
1216    }
1217
1218    #[test]
1219    fn test_header_ids() {
1220        let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1221        let header = &vcf.header();
1222        use crate::bcf::header::Id;
1223
1224        assert_eq!(header.id_to_name(Id(4)), b"GT");
1225        assert_eq!(header.name_to_id(b"GT").unwrap(), Id(4));
1226        assert!(header.name_to_id(b"XX").is_err());
1227    }
1228
1229    #[test]
1230    fn test_header_samples() {
1231        let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1232        let header = &vcf.header();
1233
1234        assert_eq!(header.id_to_sample(Id(0)), b"one");
1235        assert_eq!(header.id_to_sample(Id(1)), b"two");
1236        assert_eq!(header.sample_to_id(b"one").unwrap(), Id(0));
1237        assert_eq!(header.sample_to_id(b"two").unwrap(), Id(1));
1238        assert!(header.sample_to_id(b"three").is_err());
1239    }
1240
1241    #[test]
1242    fn test_header_contigs() {
1243        let vcf = Reader::from_path("test/test_multi.bcf").expect("Error opening file.");
1244        let header = &vcf.header();
1245
1246        assert_eq!(header.contig_count(), 86);
1247
1248        // test existing contig names and IDs
1249        assert_eq!(header.rid2name(0).unwrap(), b"1");
1250        assert_eq!(header.name2rid(b"1").unwrap(), 0);
1251
1252        assert_eq!(header.rid2name(85).unwrap(), b"hs37d5");
1253        assert_eq!(header.name2rid(b"hs37d5").unwrap(), 85);
1254
1255        // test nonexistent contig names and IDs
1256        assert!(header.name2rid(b"nonexistent_contig").is_err());
1257        assert!(header.rid2name(100).is_err());
1258    }
1259
1260    #[test]
1261    fn test_header_records() {
1262        let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1263        let records = vcf.header().header_records();
1264        assert_eq!(records.len(), 10);
1265
1266        match records[1] {
1267            HeaderRecord::Filter {
1268                ref key,
1269                ref values,
1270            } => {
1271                assert_eq!(key, "FILTER");
1272                assert_eq!(values["ID"], "PASS");
1273            }
1274            _ => {
1275                panic!("Invalid HeaderRecord");
1276            }
1277        }
1278    }
1279
1280    #[test]
1281    fn test_header_info_types() {
1282        let vcf = Reader::from_path("test/test.bcf").unwrap();
1283        let header = vcf.header();
1284        let truth = vec![
1285            (
1286                // INFO=<ID=INDEL,Number=0,Type=Flag>
1287                "INDEL",
1288                header::TagType::Flag,
1289                header::TagLength::Fixed(0),
1290            ),
1291            (
1292                // INFO=<ID=DP,Number=1,Type=Integer>
1293                "DP",
1294                header::TagType::Integer,
1295                header::TagLength::Fixed(1),
1296            ),
1297            (
1298                // INFO=<ID=QS,Number=R,Type=Float>
1299                "QS",
1300                header::TagType::Float,
1301                header::TagLength::Alleles,
1302            ),
1303            (
1304                // INFO=<ID=I16,Number=16,Type=Float>
1305                "I16",
1306                header::TagType::Float,
1307                header::TagLength::Fixed(16),
1308            ),
1309        ];
1310        for (ref_name, ref_type, ref_length) in truth {
1311            let (tag_type, tag_length) = header.info_type(ref_name.as_bytes()).unwrap();
1312            assert_eq!(tag_type, ref_type);
1313            assert_eq!(tag_length, ref_length);
1314        }
1315
1316        let vcf = Reader::from_path("test/test_svlen.vcf").unwrap();
1317        let header = vcf.header();
1318        let truth = vec![
1319            (
1320                // INFO=<ID=IMPRECISE,Number=0,Type=Flag>
1321                "IMPRECISE",
1322                header::TagType::Flag,
1323                header::TagLength::Fixed(0),
1324            ),
1325            (
1326                // INFO=<ID=SVTYPE,Number=1,Type=String>
1327                "SVTYPE",
1328                header::TagType::String,
1329                header::TagLength::Fixed(1),
1330            ),
1331            (
1332                // INFO=<ID=SVLEN,Number=.,Type=Integer>
1333                "SVLEN",
1334                header::TagType::Integer,
1335                header::TagLength::Variable,
1336            ),
1337            (
1338                // INFO<ID=CIGAR,Number=A,Type=String>
1339                "CIGAR",
1340                header::TagType::String,
1341                header::TagLength::AltAlleles,
1342            ),
1343        ];
1344        for (ref_name, ref_type, ref_length) in truth {
1345            let (tag_type, tag_length) = header.info_type(ref_name.as_bytes()).unwrap();
1346            assert_eq!(tag_type, ref_type);
1347            assert_eq!(tag_length, ref_length);
1348        }
1349
1350        assert!(header.info_type(b"NOT_THERE").is_err());
1351    }
1352
1353    #[test]
1354    fn test_remove_alleles() {
1355        let mut bcf = Reader::from_path("test/test_multi.bcf").unwrap();
1356        for res in bcf.records() {
1357            let mut record = res.unwrap();
1358            if record.pos() == 10080 {
1359                record.remove_alleles(&[false, false, true]).unwrap();
1360                assert_eq!(record.alleles(), [b"A", b"C"]);
1361            }
1362        }
1363    }
1364
1365    // Helper function reading full file into string.
1366    fn read_all<P: AsRef<Path>>(path: P) -> String {
1367        let mut file = File::open(path.as_ref())
1368            .unwrap_or_else(|_| panic!("Unable to open the file: {:?}", path.as_ref()));
1369        let mut contents = String::new();
1370        file.read_to_string(&mut contents)
1371            .unwrap_or_else(|_| panic!("Unable to read the file: {:?}", path.as_ref()));
1372        contents
1373    }
1374
1375    // Open `test_various.vcf`, add a record from scratch to it and write it out again.
1376    //
1377    // This exercises the full functionality of updating information in a `record::Record`.
1378    #[test]
1379    fn test_write_various() {
1380        // Open reader, then create writer.
1381        let tmp = tempfile::Builder::new()
1382            .prefix("rust-htslib")
1383            .tempdir()
1384            .expect("Cannot create temp dir");
1385        let out_path = tmp.path().join("test_various.out.vcf");
1386
1387        let vcf = Reader::from_path("test/test_various.vcf").expect("Error opening file.");
1388        // The writer goes into its own block so we can ensure that the file is closed and
1389        // all data is written below.
1390        {
1391            let mut writer = Writer::from_path(
1392                &out_path,
1393                &Header::from_template(vcf.header()),
1394                true,
1395                Format::Vcf,
1396            )
1397            .expect("Error opening file.");
1398
1399            // Setup empty record, filled below.
1400            let mut record = writer.empty_record();
1401
1402            record.set_rid(Some(0));
1403            assert_eq!(record.rid().unwrap(), 0);
1404
1405            record.set_pos(12);
1406            assert_eq!(record.pos(), 12);
1407
1408            assert_eq!(str::from_utf8(record.id().as_ref()).unwrap(), ".");
1409            record.set_id(b"to_be_cleared").unwrap();
1410            assert_eq!(
1411                str::from_utf8(record.id().as_ref()).unwrap(),
1412                "to_be_cleared"
1413            );
1414            record.clear_id().unwrap();
1415            assert_eq!(str::from_utf8(record.id().as_ref()).unwrap(), ".");
1416            record.set_id(b"first_id").unwrap();
1417            record.push_id(b"second_id").unwrap();
1418            record.push_id(b"first_id").unwrap();
1419
1420            assert!(record.filters().next().is_none());
1421            record.set_filters(&["q10".as_bytes()]).unwrap();
1422            record.push_filter("s50".as_bytes()).unwrap();
1423            record.remove_filter("q10".as_bytes(), true).unwrap();
1424            record.push_filter("q10".as_bytes()).unwrap();
1425
1426            record.set_alleles(&[b"C", b"T", b"G"]).unwrap();
1427
1428            record.set_qual(10.0);
1429
1430            record.push_info_integer(b"N1", &[32]).unwrap();
1431            record.push_info_float(b"F1", &[33.0]).unwrap();
1432            record.push_info_string(b"S1", &[b"fourtytwo"]).unwrap();
1433            record.push_info_flag(b"X1").unwrap();
1434
1435            record
1436                .push_genotypes(&[
1437                    GenotypeAllele::Unphased(0),
1438                    GenotypeAllele::Unphased(1),
1439                    GenotypeAllele::Unphased(1),
1440                    GenotypeAllele::Phased(1),
1441                ])
1442                .unwrap();
1443
1444            record
1445                .push_format_string(b"FS1", &[&b"yes"[..], &b"no"[..]])
1446                .unwrap();
1447            record.push_format_integer(b"FF1", &[43, 11]).unwrap();
1448            record.push_format_float(b"FN1", &[42.0, 10.0]).unwrap();
1449            record
1450                .push_format_char(b"CH1", &[b"A"[0], b"B"[0]])
1451                .unwrap();
1452
1453            // Finally, write out the record.
1454            writer.write(&record).unwrap();
1455        }
1456
1457        // Now, compare expected and real output.
1458        let expected = read_all("test/test_various.out.vcf");
1459        let actual = read_all(&out_path);
1460        assert_eq!(expected, actual);
1461    }
1462
1463    #[test]
1464    fn test_remove_headers() {
1465        let vcf = Reader::from_path("test/test_headers.vcf").expect("Error opening file.");
1466        let tmp = tempfile::Builder::new()
1467            .prefix("rust-htslib")
1468            .tempdir()
1469            .expect("Cannot create temp dir");
1470        let vcfpath = tmp.path().join("test.vcf");
1471        let mut header = Header::from_template(&vcf.header);
1472        header
1473            .remove_contig(b"contig2")
1474            .remove_info(b"INFO2")
1475            .remove_format(b"FORMAT2")
1476            .remove_filter(b"FILTER2")
1477            .remove_structured(b"Foo2")
1478            .remove_generic(b"Bar2");
1479        {
1480            let mut _writer = Writer::from_path(&vcfpath, &header, true, Format::Vcf)
1481                .expect("Error opening output file.");
1482            // Note that we don't need to write anything, we are just looking at the header.
1483        }
1484
1485        let expected = read_all("test/test_headers.out.vcf");
1486        let actual = read_all(&vcfpath);
1487        assert_eq!(expected, actual);
1488    }
1489
1490    #[test]
1491    fn test_synced_reader() {
1492        let mut reader = synced::SyncedReader::new().unwrap();
1493        reader.set_require_index(true);
1494        reader.set_pairing(synced::pairing::SNPS);
1495
1496        assert_eq!(reader.reader_count(), 0);
1497        reader.add_reader("test/test_left.vcf.gz").unwrap();
1498        reader.add_reader("test/test_right.vcf.gz").unwrap();
1499        assert_eq!(reader.reader_count(), 2);
1500
1501        let res1 = reader.read_next();
1502        assert_eq!(res1.unwrap(), 2);
1503        assert!(reader.has_line(0));
1504        assert!(reader.has_line(1));
1505
1506        let res2 = reader.read_next();
1507        assert_eq!(res2.unwrap(), 1);
1508        assert!(reader.has_line(0));
1509        assert!(!reader.has_line(1));
1510
1511        let res3 = reader.read_next();
1512        assert_eq!(res3.unwrap(), 1);
1513        assert!(!reader.has_line(0));
1514        assert!(reader.has_line(1));
1515
1516        let res4 = reader.read_next();
1517        assert_eq!(res4.unwrap(), 0);
1518    }
1519
1520    #[test]
1521    fn test_synced_reader_fetch() {
1522        let mut reader = synced::SyncedReader::new().unwrap();
1523        reader.set_require_index(true);
1524        reader.set_pairing(synced::pairing::SNPS);
1525
1526        assert_eq!(reader.reader_count(), 0);
1527        reader.add_reader("test/test_left.vcf.gz").unwrap();
1528        reader.add_reader("test/test_right.vcf.gz").unwrap();
1529        assert_eq!(reader.reader_count(), 2);
1530
1531        reader.fetch(0, 0, 1000).unwrap();
1532        let res1 = reader.read_next();
1533        assert_eq!(res1.unwrap(), 2);
1534        assert!(reader.has_line(0));
1535        assert!(reader.has_line(1));
1536
1537        let res2 = reader.read_next();
1538        assert_eq!(res2.unwrap(), 1);
1539        assert!(reader.has_line(0));
1540        assert!(!reader.has_line(1));
1541
1542        let res3 = reader.read_next();
1543        assert_eq!(res3.unwrap(), 1);
1544        assert!(!reader.has_line(0));
1545        assert!(reader.has_line(1));
1546
1547        let res4 = reader.read_next();
1548        assert_eq!(res4.unwrap(), 0);
1549    }
1550
1551    #[test]
1552    fn test_svlen() {
1553        let mut reader = Reader::from_path("test/test_svlen.vcf").unwrap();
1554
1555        let mut record = reader.empty_record();
1556        reader.read(&mut record).unwrap().unwrap();
1557
1558        assert_eq!(
1559            *record.info(b"SVLEN").integer().unwrap().unwrap(),
1560            &[-127][..]
1561        );
1562    }
1563
1564    #[test]
1565    fn test_fails_on_bam() {
1566        let reader = Reader::from_path("test/test.bam");
1567        assert!(reader.is_err());
1568    }
1569
1570    #[test]
1571    fn test_fails_on_non_existiant() {
1572        let reader = Reader::from_path("test/no_such_file");
1573        assert!(reader.is_err());
1574    }
1575
1576    #[test]
1577    fn test_multi_string_info_tag() {
1578        let mut reader = Reader::from_path("test/test-info-multi-string.vcf").unwrap();
1579        let mut rec = reader.empty_record();
1580        let _ = reader.read(&mut rec);
1581
1582        assert_eq!(
1583            rec.info_shared_buffer(b"ANN", Buffer::new())
1584                .string()
1585                .unwrap()
1586                .unwrap()
1587                .len(),
1588            14
1589        );
1590    }
1591
1592    #[test]
1593    fn test_multi_string_info_tag_number_a() {
1594        let mut reader = Reader::from_path("test/test-info-multi-string-number=A.vcf").unwrap();
1595        let mut rec = reader.empty_record();
1596        let _ = reader.read(&mut rec);
1597
1598        assert_eq!(
1599            rec.info_shared_buffer(b"X", Buffer::new())
1600                .string()
1601                .unwrap()
1602                .unwrap()
1603                .len(),
1604            2
1605        );
1606    }
1607
1608    #[test]
1609    fn test_genotype_allele_conversion() {
1610        let allele = GenotypeAllele::Unphased(1);
1611        let converted: i32 = allele.into();
1612        let expected = 4;
1613        assert_eq!(converted, expected);
1614        let reverse_conversion = GenotypeAllele::from(expected);
1615        assert_eq!(allele, reverse_conversion);
1616    }
1617
1618    #[test]
1619    fn test_genotype_missing_allele_conversion() {
1620        let allele = GenotypeAllele::PhasedMissing;
1621        let converted: i32 = allele.into();
1622        let expected = 1;
1623        assert_eq!(converted, expected);
1624        let reverse_conversion = GenotypeAllele::from(expected);
1625        assert_eq!(allele, reverse_conversion);
1626    }
1627
1628    #[test]
1629    fn test_alt_allele_dosage() {
1630        let path = &"test/test_string.vcf";
1631        let mut bcf = Reader::from_path(path).expect("Error opening file.");
1632        let _header = bcf.header();
1633        // FORMAT fields of first record of the vcf should look like:
1634        // GT:FS1:FN1	./1:LongString1:1	1/1:ss1:2
1635        let first_record = bcf.records().next().unwrap().expect("Fail to read record");
1636        let sample_count = usize::try_from(first_record.sample_count()).unwrap();
1637        assert_eq!(sample_count, 2);
1638        let mut n_ref = vec![0; sample_count];
1639        let mut n_alt = vec![0; sample_count];
1640        let mut n_missing = vec![0; sample_count];
1641        let gts = first_record.genotypes().expect("Error reading genotypes");
1642        for sample_index in 0..sample_count {
1643            // for each sample
1644            for gta in gts.get(sample_index).iter() {
1645                // for each allele
1646                match gta.index() {
1647                    Some(0) => n_ref[sample_index] += 1,  // reference allele
1648                    Some(_) => n_alt[sample_index] += 1,  // alt allele
1649                    None => n_missing[sample_index] += 1, // missing allele
1650                }
1651            }
1652        }
1653        assert_eq!(n_ref, [0, 0]);
1654        assert_eq!(n_alt, [1, 2]);
1655        assert_eq!(n_missing, [1, 0]);
1656    }
1657
1658    #[test]
1659    fn test_obs_cornercase() {
1660        let mut reader = Reader::from_path("test/obs-cornercase.vcf").unwrap();
1661        let first_record = reader
1662            .records()
1663            .next()
1664            .unwrap()
1665            .expect("Fail to read record");
1666
1667        assert_eq!(
1668            *first_record.info(b"EVENT").string().unwrap().unwrap(),
1669            [b"gridss33fb_1085"]
1670        );
1671        assert_eq!(
1672            *first_record.info(b"MATEID").string().unwrap().unwrap(),
1673            [b"gridss33fb_1085h"]
1674        );
1675    }
1676
1677    #[test]
1678    fn test_trailing_omitted_format_fields() {
1679        let mut reader = Reader::from_path("test/test_trailing_omitted_format.vcf").unwrap();
1680        let first_record = reader
1681            .records()
1682            .next()
1683            .unwrap()
1684            .expect("Fail to read record");
1685
1686        let expected: Vec<&[u8]> = Vec::new();
1687        assert_eq!(*first_record.format(b"STR").string().unwrap(), expected,);
1688        assert_eq!(
1689            *first_record.format(b"INT").integer().unwrap(),
1690            vec![&[i32::missing()]],
1691        );
1692        assert!(first_record.format(b"FLT").float().unwrap()[0][0].is_nan(),);
1693    }
1694
1695    // #[test]
1696    // fn test_buffer_lifetime() {
1697    //     let mut reader = Reader::from_path("test/obs-cornercase.vcf").unwrap();
1698    //     let first_record = reader
1699    //         .records()
1700    //         .next()
1701    //         .unwrap()
1702    //         .expect("Fail to read record");
1703
1704    //     fn get_value<'a, 'b>(record: &'a Record) -> &'b [u8] {
1705    //         // FIXME: this should not be possible, because the slice outlives the buffer.
1706    //         let buffer: BufferBacked<'b, _, _> = record.info(b"EVENT").string().unwrap().unwrap();
1707    //         let value: &'b [u8] = buffer[0];
1708    //         value
1709    //     }
1710
1711    //     let buffered = first_record.info(b"EVENT").string().unwrap().unwrap();
1712    //     assert_eq!(get_value(&first_record), buffered[0]);
1713    // }
1714}