1use std::ffi;
107use std::path::Path;
108use std::rc::Rc;
109use std::str;
110
111use url::Url;
112
113pub mod buffer;
114pub mod header;
115pub mod index;
116pub mod record;
117
118use crate::bcf::header::{HeaderView, SampleSubset};
119use crate::errors::{Error, Result};
120use crate::htslib;
121
122pub use crate::bcf::header::{Header, HeaderRecord};
123pub use crate::bcf::record::Record;
124
125pub trait Read: Sized {
127 fn read(&mut self, record: &mut record::Record) -> Option<Result<()>>;
136
137 fn records(&mut self) -> Records<'_, Self>;
139
140 fn header(&self) -> &HeaderView;
142
143 fn empty_record(&self) -> Record;
145
146 fn set_threads(&mut self, n_threads: usize) -> Result<()>;
156}
157
158#[derive(Debug)]
160pub struct Reader {
161 inner: *mut htslib::htsFile,
162 header: Rc<HeaderView>,
163}
164
165unsafe impl Send for Reader {}
166pub unsafe fn set_threads(hts_file: *mut htslib::htsFile, n_threads: usize) -> Result<()> {
170 assert!(n_threads > 0, "n_threads must be > 0");
171
172 let r = htslib::hts_set_threads(hts_file, n_threads as i32);
173 if r != 0 {
174 Err(Error::SetThreads)
175 } else {
176 Ok(())
177 }
178}
179
180impl Reader {
181 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
183 match path.as_ref().to_str() {
184 Some(p) if !path.as_ref().exists() => Err(Error::FileNotFound { path: p.into() }),
185 Some(p) => Self::new(p.as_bytes()),
186 _ => Err(Error::NonUnicodePath),
187 }
188 }
189
190 pub fn from_url(url: &Url) -> Result<Self> {
192 Self::new(url.as_str().as_bytes())
193 }
194
195 pub fn from_stdin() -> Result<Self> {
197 Self::new(b"-")
198 }
199
200 fn new(path: &[u8]) -> Result<Self> {
201 let htsfile = bcf_open(path, b"r")?;
202 let header = unsafe { htslib::bcf_hdr_read(htsfile) };
203 Ok(Reader {
204 inner: htsfile,
205 header: Rc::new(HeaderView::new(header)),
206 })
207 }
208}
209
210impl Read for Reader {
211 fn read(&mut self, record: &mut record::Record) -> Option<Result<()>> {
212 match unsafe { htslib::bcf_read(self.inner, self.header.inner, record.inner) } {
213 0 => {
214 unsafe {
215 htslib::bcf_unpack(record.inner_mut(), htslib::BCF_UN_ALL as i32);
217 }
218 record.set_header(Rc::clone(&self.header));
219 Some(Ok(()))
220 }
221 -1 => None,
222 _ => Some(Err(Error::BcfInvalidRecord)),
223 }
224 }
225
226 fn records(&mut self) -> Records<'_, Self> {
227 Records { reader: self }
228 }
229
230 fn set_threads(&mut self, n_threads: usize) -> Result<()> {
231 unsafe { set_threads(self.inner, n_threads) }
232 }
233
234 fn header(&self) -> &HeaderView {
235 &self.header
236 }
237
238 fn empty_record(&self) -> Record {
240 self.header.empty_record()
241 }
242}
243
244impl Drop for Reader {
245 fn drop(&mut self) {
246 unsafe {
247 htslib::hts_close(self.inner);
248 }
249 }
250}
251
252#[derive(Debug)]
254pub struct IndexedReader {
255 inner: *mut htslib::bcf_srs_t,
257 header: Rc<HeaderView>,
259
260 current_region: Option<(u32, u64, Option<u64>)>,
262}
263
264unsafe impl Send for IndexedReader {}
265
266impl IndexedReader {
267 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
273 let path = path.as_ref();
274 match path.to_str() {
275 Some(p) if path.exists() => {
276 Self::new(&ffi::CString::new(p).map_err(|_| Error::NonUnicodePath)?)
277 }
278 Some(p) => Err(Error::FileNotFound { path: p.into() }),
279 None => Err(Error::NonUnicodePath),
280 }
281 }
282
283 pub fn from_url(url: &Url) -> Result<Self> {
285 Self::new(&ffi::CString::new(url.as_str()).unwrap())
286 }
287
288 fn new(path: &ffi::CStr) -> Result<Self> {
294 let ser_reader = unsafe { htslib::bcf_sr_init() };
296 unsafe {
297 htslib::bcf_sr_set_opt(ser_reader, 0);
298 } if unsafe { htslib::bcf_sr_add_reader(ser_reader, path.as_ptr()) } >= 0 {
301 let header = Rc::new(HeaderView::new(unsafe {
302 htslib::bcf_hdr_dup((*(*ser_reader).readers.offset(0)).header)
303 }));
304 Ok(IndexedReader {
305 inner: ser_reader,
306 header,
307 current_region: None,
308 })
309 } else {
310 Err(Error::BcfOpen {
311 target: path.to_str().unwrap().to_owned(),
312 })
313 }
314 }
315
316 pub fn fetch(&mut self, rid: u32, start: u64, end: Option<u64>) -> Result<()> {
329 let contig = self.header.rid2name(rid)?;
330 let contig = ffi::CString::new(contig).unwrap();
331 if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
332 Err(Error::GenomicSeek {
333 contig: contig.to_str().unwrap().to_owned(),
334 start,
335 })
336 } else {
337 self.current_region = Some((rid, start, end));
338 Ok(())
339 }
340 }
341}
342
343impl Read for IndexedReader {
344 fn read(&mut self, record: &mut record::Record) -> Option<Result<()>> {
345 match unsafe { htslib::bcf_sr_next_line(self.inner) } {
346 0 => {
347 if unsafe { (*self.inner).errnum } != 0 {
348 Some(Err(Error::BcfInvalidRecord))
349 } else {
350 None
351 }
352 }
353 i => {
354 assert!(i > 0, "Must not be negative");
355 unsafe {
360 htslib::bcf_copy(
361 record.inner,
362 *(*(*self.inner).readers.offset(0)).buffer.offset(0),
363 );
364 }
365
366 unsafe {
367 htslib::bcf_unpack(record.inner_mut(), htslib::BCF_UN_ALL as i32);
369 }
370
371 record.set_header(Rc::clone(&self.header));
372
373 match self.current_region {
374 Some((rid, _start, end)) => {
375 let endpos = match end {
376 Some(e) => e,
377 None => u64::MAX,
378 };
379 if Some(rid) == record.rid() && record.pos() as u64 <= endpos {
380 Some(Ok(()))
381 } else {
382 None
383 }
384 }
385 None => Some(Ok(())),
386 }
387 }
388 }
389 }
390
391 fn records(&mut self) -> Records<'_, Self> {
392 Records { reader: self }
393 }
394
395 fn set_threads(&mut self, n_threads: usize) -> Result<()> {
396 assert!(n_threads > 0, "n_threads must be > 0");
397
398 let r = unsafe { htslib::bcf_sr_set_threads(self.inner, n_threads as i32) };
399 if r != 0 {
400 Err(Error::SetThreads)
401 } else {
402 Ok(())
403 }
404 }
405
406 fn header(&self) -> &HeaderView {
407 &self.header
408 }
409
410 fn empty_record(&self) -> Record {
411 Record::new(Rc::clone(&self.header))
412 }
413}
414
415impl Drop for IndexedReader {
416 fn drop(&mut self) {
417 unsafe { htslib::bcf_sr_destroy(self.inner) };
418 }
419}
420
421pub mod synced {
423
424 use super::*;
425
426 pub mod pairing {
428 pub const SNPS: u32 = crate::htslib::BCF_SR_PAIR_SNPS;
430 pub const INDELS: u32 = crate::htslib::BCF_SR_PAIR_INDELS;
432 pub const ANY: u32 = crate::htslib::BCF_SR_PAIR_ANY;
434 pub const SOME: u32 = crate::htslib::BCF_SR_PAIR_SOME;
436 pub const SNP_REF: u32 = crate::htslib::BCF_SR_PAIR_SNP_REF;
438 pub const INDEL_REF: u32 = crate::htslib::BCF_SR_PAIR_INDEL_REF;
440 pub const EXACT: u32 = crate::htslib::BCF_SR_PAIR_EXACT;
442 pub const BOTH: u32 = crate::htslib::BCF_SR_PAIR_BOTH;
444 pub const BOTH_REF: u32 = crate::htslib::BCF_SR_PAIR_BOTH_REF;
446 }
447
448 #[derive(Debug)]
450 pub struct SyncedReader {
451 inner: *mut crate::htslib::bcf_srs_t,
453
454 headers: Vec<Rc<HeaderView>>,
456
457 current_region: Option<(u32, u64, u64)>,
459 }
460
461 impl SyncedReader {
463 pub fn new() -> Result<Self> {
464 let inner = unsafe { crate::htslib::bcf_sr_init() };
465 if inner.is_null() {
466 return Err(Error::BcfAllocationError);
467 }
468
469 Ok(SyncedReader {
470 inner,
471 headers: Vec::new(),
472 current_region: None,
473 })
474 }
475
476 pub fn set_require_index(&mut self, do_require: bool) {
478 unsafe {
479 (*self.inner).require_index = if do_require { 1 } else { 0 };
480 }
481 }
482
483 pub fn set_pairing(&mut self, bitmask: u32) {
485 unsafe {
486 crate::htslib::bcf_sr_set_opt(self.inner, 1, bitmask);
488 }
489 }
490
491 pub fn add_reader<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
493 match path.as_ref().to_str() {
494 Some(p) if path.as_ref().exists() => {
495 let p_cstring = ffi::CString::new(p).unwrap();
496 let res =
497 unsafe { crate::htslib::bcf_sr_add_reader(self.inner, p_cstring.as_ptr()) };
498
499 if res == 0 {
500 return Err(Error::BcfOpen {
501 target: p.to_owned(),
502 });
503 }
504
505 let i = (self.reader_count() - 1) as isize;
506 let header = Rc::new(HeaderView::new(unsafe {
507 crate::htslib::bcf_hdr_dup((*(*self.inner).readers.offset(i)).header)
508 }));
509 self.headers.push(header);
510 Ok(())
511 }
512 _ => Err(Error::NonUnicodePath),
513 }
514 }
515
516 pub fn remove_reader(&mut self, idx: u32) {
518 if idx >= self.reader_count() {
519 panic!("Invalid reader!");
520 } else {
521 unsafe {
522 crate::htslib::bcf_sr_remove_reader(self.inner, idx as i32);
523 }
524 self.headers.remove(idx as usize);
525 }
526 }
527
528 pub fn reader_count(&self) -> u32 {
530 unsafe { (*self.inner).nreaders as u32 }
531 }
532
533 pub fn read_next(&mut self) -> Result<u32> {
535 let num = unsafe { crate::htslib::bcf_sr_next_line(self.inner) as u32 };
536
537 if num == 0 {
538 if unsafe { (*self.inner).errnum } != 0 {
539 return Err(Error::BcfInvalidRecord);
540 }
541 Ok(0)
542 } else {
543 assert!(num > 0, "num returned by htslib must not be negative");
544 match self.current_region {
545 Some((rid, _start, end)) => {
546 for idx in 0..self.reader_count() {
547 if !self.has_line(idx) {
548 continue;
549 }
550 unsafe {
551 let record = *(*(*self.inner).readers.offset(idx as isize))
552 .buffer
553 .offset(0);
554 if (*record).rid != (rid as i32) || (*record).pos >= (end as i64) {
555 return Ok(0);
556 }
557 }
558 }
559 Ok(num)
560 }
561 None => Ok(num),
562 }
563 }
564 }
565
566 pub fn has_line(&self, idx: u32) -> bool {
568 if idx >= self.reader_count() {
569 panic!("Invalid reader!");
570 } else {
571 unsafe { (*(*self.inner).has_line.offset(idx as isize)) != 0 }
572 }
573 }
574
575 pub fn record(&self, idx: u32) -> Option<Record> {
577 if self.has_line(idx) {
578 let record = Record::new(self.headers[idx as usize].clone());
579 unsafe {
580 crate::htslib::bcf_copy(
581 record.inner,
582 *(*(*self.inner).readers.offset(idx as isize))
583 .buffer
584 .offset(0),
585 );
586 }
587 Some(record)
588 } else {
589 None
590 }
591 }
592
593 pub fn header(&self, idx: u32) -> &HeaderView {
595 if idx >= self.reader_count() {
597 panic!("Invalid reader!");
598 } else {
599 &self.headers[idx as usize]
600 }
601 }
602
603 pub fn fetch(&mut self, rid: u32, start: u64, end: u64) -> Result<()> {
612 let contig = {
613 let contig = self.header(0).rid2name(rid).unwrap(); ffi::CString::new(contig).unwrap()
615 };
616 if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
617 Err(Error::GenomicSeek {
618 contig: contig.to_str().unwrap().to_owned(),
619 start,
620 })
621 } else {
622 self.current_region = Some((rid, start, end));
623 Ok(())
624 }
625 }
626 }
627
628 impl Drop for SyncedReader {
629 fn drop(&mut self) {
630 unsafe { crate::htslib::bcf_sr_destroy(self.inner) };
631 }
632 }
633}
634
635#[derive(Clone, Copy, Debug, PartialEq, Eq)]
636pub enum Format {
637 Vcf,
638 Bcf,
639}
640
641#[derive(Debug)]
643pub struct Writer {
644 inner: *mut htslib::htsFile,
645 header: Rc<HeaderView>,
646 subset: Option<SampleSubset>,
647}
648
649unsafe impl Send for Writer {}
650
651impl Writer {
652 pub fn from_path<P: AsRef<Path>>(
661 path: P,
662 header: &Header,
663 uncompressed: bool,
664 format: Format,
665 ) -> Result<Self> {
666 if let Some(p) = path.as_ref().to_str() {
667 Ok(Self::new(p.as_bytes(), header, uncompressed, format)?)
668 } else {
669 Err(Error::NonUnicodePath)
670 }
671 }
672
673 pub fn from_url(
682 url: &Url,
683 header: &Header,
684 uncompressed: bool,
685 format: Format,
686 ) -> Result<Self> {
687 Self::new(url.as_str().as_bytes(), header, uncompressed, format)
688 }
689
690 pub fn from_stdout(header: &Header, uncompressed: bool, format: Format) -> Result<Self> {
698 Self::new(b"-", header, uncompressed, format)
699 }
700
701 fn new(path: &[u8], header: &Header, uncompressed: bool, format: Format) -> Result<Self> {
702 let mode: &[u8] = match (uncompressed, format) {
703 (true, Format::Vcf) => b"w",
704 (false, Format::Vcf) => b"wz",
705 (true, Format::Bcf) => b"wbu",
706 (false, Format::Bcf) => b"wb",
707 };
708
709 let htsfile = bcf_open(path, mode)?;
710 unsafe { htslib::bcf_hdr_write(htsfile, header.inner) };
711 Ok(Writer {
712 inner: htsfile,
713 header: Rc::new(HeaderView::new(unsafe {
714 htslib::bcf_hdr_dup(header.inner)
715 })),
716 subset: header.subset.clone(),
717 })
718 }
719
720 pub fn header(&self) -> &HeaderView {
722 &self.header
723 }
724
725 pub fn empty_record(&self) -> Record {
729 record::Record::new(Rc::clone(&self.header))
730 }
731
732 pub fn translate(&mut self, record: &mut record::Record) {
738 unsafe {
739 htslib::bcf_translate(self.header.inner, record.header().inner, record.inner);
740 }
741 record.set_header(Rc::clone(&self.header));
742 }
743
744 pub fn subset(&mut self, record: &mut record::Record) {
750 if let Some(ref mut subset) = self.subset {
751 unsafe {
752 htslib::bcf_subset(
753 self.header.inner,
754 record.inner,
755 subset.len() as i32,
756 subset.as_mut_ptr(),
757 );
758 }
759 }
760 }
761
762 pub fn write(&mut self, record: &record::Record) -> Result<()> {
768 if unsafe { htslib::bcf_write(self.inner, self.header.inner, record.inner) } == -1 {
769 Err(Error::WriteRecord)
770 } else {
771 Ok(())
772 }
773 }
774
775 pub fn set_threads(&mut self, n_threads: usize) -> Result<()> {
782 unsafe { set_threads(self.inner, n_threads) }
783 }
784}
785
786impl Drop for Writer {
787 fn drop(&mut self) {
788 unsafe {
789 htslib::hts_close(self.inner);
790 }
791 }
792}
793
794#[derive(Debug)]
795pub struct Records<'a, R: Read> {
796 reader: &'a mut R,
797}
798
799impl<R: Read> Iterator for Records<'_, R> {
800 type Item = Result<record::Record>;
801
802 fn next(&mut self) -> Option<Result<record::Record>> {
803 let mut record = self.reader.empty_record();
804 match self.reader.read(&mut record) {
805 Some(Err(e)) => Some(Err(e)),
806 Some(Ok(_)) => Some(Ok(record)),
807 None => None,
808 }
809 }
810}
811
812fn bcf_open(target: &[u8], mode: &[u8]) -> Result<*mut htslib::htsFile> {
814 let p = ffi::CString::new(target).unwrap();
815 let c_str = ffi::CString::new(mode).unwrap();
816 let ret = unsafe { htslib::hts_open(p.as_ptr(), c_str.as_ptr()) };
817
818 if ret.is_null() {
819 return Err(Error::BcfOpen {
820 target: str::from_utf8(target).unwrap().to_owned(),
821 });
822 }
823
824 unsafe {
825 if !(mode.contains(&b'w')
826 || (*ret).format.category == htslib::htsFormatCategory_variant_data)
827 {
828 return Err(Error::BcfOpen {
829 target: str::from_utf8(target).unwrap().to_owned(),
830 });
831 }
832 }
833 Ok(ret)
834}
835
836#[cfg(test)]
837mod tests {
838 use tempfile::NamedTempFile;
839
840 use super::record::Buffer;
841 use super::*;
842 use crate::bcf::header::Id;
843 use crate::bcf::record::GenotypeAllele;
844 use crate::bcf::record::Numeric;
845 use crate::bcf::Reader;
846 use std::convert::TryFrom;
847 use std::fs::File;
848 use std::io::prelude::Read as IoRead;
849 use std::path::Path;
850 use std::str;
851
852 fn _test_read<P: AsRef<Path>>(path: &P) {
853 let mut bcf = Reader::from_path(path).expect("Error opening file.");
854 assert_eq!(bcf.header.samples(), [b"NA12878.subsample-0.25-0"]);
855
856 for (i, rec) in bcf.records().enumerate() {
857 let record = rec.expect("Error reading record.");
858 assert_eq!(record.sample_count(), 1);
859
860 assert_eq!(record.rid().expect("Error reading rid."), 0);
861 assert_eq!(record.pos(), 10021 + i as i64);
862 assert!((record.qual() - 0f32).abs() < f32::EPSILON);
863 let mut buffer = Buffer::new();
864 assert!(
865 (record
866 .info_shared_buffer(b"MQ0F", &mut buffer)
867 .float()
868 .expect("Error reading info.")
869 .expect("Missing tag")[0]
870 - 1.0)
871 .abs()
872 < f32::EPSILON
873 );
874 if i == 59 {
875 assert!(
876 (record
877 .info_shared_buffer(b"SGB", &mut buffer)
878 .float()
879 .expect("Error reading info.")
880 .expect("Missing tag")[0]
881 - -0.379885)
882 .abs()
883 < f32::EPSILON
884 );
885 }
886 assert_eq!(record.alleles().iter().last().unwrap(), b"<X>");
888
889 let fmt = record.format(b"PL");
890 let pl = fmt.integer().expect("Error reading format.");
891 assert_eq!(pl.len(), 1);
892 if i == 59 {
893 assert_eq!(pl[0].len(), 6);
894 } else {
895 assert_eq!(pl[0].len(), 3);
896 }
897 }
898 }
899
900 #[test]
901 fn test_read() {
902 _test_read(&"test/test.bcf");
903 }
904
905 #[test]
906 fn test_reader_set_threads() {
907 let path = &"test/test.bcf";
908 let mut bcf = Reader::from_path(path).expect("Error opening file.");
909 bcf.set_threads(2).unwrap();
910 }
911
912 #[test]
913 fn test_writer_set_threads() {
914 let path = &"test/test.bcf";
915 let tmp = tempfile::Builder::new()
916 .prefix("rust-htslib")
917 .tempdir()
918 .expect("Cannot create temp dir");
919 let bcfpath = tmp.path().join("test.bcf");
920 let bcf = Reader::from_path(path).expect("Error opening file.");
921 let header = Header::from_template_subset(&bcf.header, &[b"NA12878.subsample-0.25-0"])
922 .expect("Error subsetting samples.");
923 let mut writer =
924 Writer::from_path(&bcfpath, &header, false, Format::Bcf).expect("Error opening file.");
925 writer.set_threads(2).unwrap();
926 }
927
928 #[test]
929 fn test_fetch() {
930 let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
931 bcf.set_threads(2).unwrap();
932 let rid = bcf
933 .header()
934 .name2rid(b"1")
935 .expect("Translating from contig '1' to ID failed.");
936 bcf.fetch(rid, 10_033, Some(10_060))
937 .expect("Fetching failed");
938 assert_eq!(bcf.records().count(), 28);
939 }
940
941 #[test]
942 fn test_fetch_all() {
943 let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
944 bcf.set_threads(2).unwrap();
945 let rid = bcf
946 .header()
947 .name2rid(b"1")
948 .expect("Translating from contig '1' to ID failed.");
949 bcf.fetch(rid, 0, None).expect("Fetching failed");
950 assert_eq!(bcf.records().count(), 62);
951 }
952
953 #[test]
954 fn test_fetch_open_ended() {
955 let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
956 bcf.set_threads(2).unwrap();
957 let rid = bcf
958 .header()
959 .name2rid(b"1")
960 .expect("Translating from contig '1' to ID failed.");
961 bcf.fetch(rid, 10077, None).expect("Fetching failed");
962 assert_eq!(bcf.records().count(), 6);
963 }
964
965 #[test]
966 fn test_fetch_start_greater_than_last_vcf_pos() {
967 let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
968 bcf.set_threads(2).unwrap();
969 let rid = bcf
970 .header()
971 .name2rid(b"1")
972 .expect("Translating from contig '1' to ID failed.");
973 bcf.fetch(rid, 20077, None).expect("Fetching failed");
974 assert_eq!(bcf.records().count(), 0);
975 }
976
977 #[test]
978 fn test_write() {
979 let mut bcf = Reader::from_path("test/test_multi.bcf").expect("Error opening file.");
980 let tmp = tempfile::Builder::new()
981 .prefix("rust-htslib")
982 .tempdir()
983 .expect("Cannot create temp dir");
984 let bcfpath = tmp.path().join("test.bcf");
985 println!("{:?}", bcfpath);
986 {
987 let header = Header::from_template_subset(&bcf.header, &[b"NA12878.subsample-0.25-0"])
988 .expect("Error subsetting samples.");
989 let mut writer = Writer::from_path(&bcfpath, &header, false, Format::Bcf)
990 .expect("Error opening file.");
991 for rec in bcf.records() {
992 let mut record = rec.expect("Error reading record.");
993 writer.translate(&mut record);
994 writer.subset(&mut record);
995 record.trim_alleles().expect("Error trimming alleles.");
996 writer.write(&record).expect("Error writing record");
997 }
998 }
999 {
1000 _test_read(&bcfpath);
1001 }
1002 tmp.close().expect("Failed to delete temp dir");
1003 }
1004
1005 #[test]
1006 fn test_strings() {
1007 let mut vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1008 let fs1 = [
1009 &b"LongString1"[..],
1010 &b"LongString2"[..],
1011 &b"."[..],
1012 &b"LongString4"[..],
1013 &b"evenlength"[..],
1014 &b"ss6"[..],
1015 ];
1016 let mut buffer = Buffer::new();
1017 for (i, rec) in vcf.records().enumerate() {
1018 println!("record {}", i);
1019 let record = rec.expect("Error reading record.");
1020 assert_eq!(
1021 record
1022 .info_shared_buffer(b"S1", &mut buffer)
1023 .string()
1024 .expect("Error reading string.")
1025 .expect("Missing tag")[0],
1026 format!("string{}", i + 1).as_bytes()
1027 );
1028 let fs1_str_vec = record
1029 .format_shared_buffer(b"FS1", &mut buffer)
1030 .string()
1031 .expect("Error reading string.");
1032 assert_eq!(fs1_str_vec.len(), 2);
1033 println!("{}", String::from_utf8_lossy(fs1_str_vec[0]));
1034 assert_eq!(
1035 record
1036 .format(b"FS1")
1037 .string()
1038 .expect("Error reading string.")[0],
1039 fs1[i]
1040 );
1041 }
1042 }
1043
1044 #[test]
1045 fn test_missing() {
1046 let mut vcf = Reader::from_path("test/test_missing.vcf").expect("Error opening file.");
1047 let fn4 = [
1048 &[
1049 i32::missing(),
1050 i32::missing(),
1051 i32::missing(),
1052 i32::missing(),
1053 ][..],
1054 &[i32::missing()][..],
1055 ];
1056 let f1 = [false, true];
1057 let mut buffer = Buffer::new();
1058 for (i, rec) in vcf.records().enumerate() {
1059 let record = rec.expect("Error reading record.");
1060 assert_eq!(
1061 record
1062 .info_shared_buffer(b"F1", &mut buffer)
1063 .float()
1064 .expect("Error reading float.")
1065 .expect("Missing tag")[0]
1066 .is_nan(),
1067 f1[i]
1068 );
1069 assert_eq!(
1070 record
1071 .format(b"FN4")
1072 .integer()
1073 .expect("Error reading integer.")[1],
1074 fn4[i]
1075 );
1076 assert!(
1077 record.format(b"FF4").float().expect("Error reading float.")[1]
1078 .iter()
1079 .all(|&v| v.is_missing())
1080 );
1081 }
1082 }
1083
1084 #[test]
1085 fn test_genotypes() {
1086 let mut vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1087 let expected = ["./1", "1|1", "0/1", "0|1", "1|.", "1/1"];
1088 for (rec, exp_gt) in vcf.records().zip(expected.iter()) {
1089 let rec = rec.expect("Error reading record.");
1090 let genotypes = rec.genotypes().expect("Error reading genotypes");
1091 assert_eq!(&format!("{}", genotypes.get(0)), exp_gt);
1092 }
1093 }
1094
1095 #[test]
1096 fn test_genotypes_read_mixed_ploidy() {
1097 let mut vcf = Reader::from_path("test/test_non_diploid.vcf").expect("Error opening file.");
1098
1099 let expected = [vec!["0", "1"], vec!["0/1", "1/1"], vec!["1|0", "1/1|0"]];
1101
1102 for (rec, exp_gts) in vcf.records().zip(expected.iter()) {
1103 let rec = rec.expect("Error reading record.");
1104
1105 let genotypes = rec.genotypes().expect("Error reading genotypes");
1107
1108 for (sample, exp_gt) in exp_gts.iter().enumerate() {
1110 assert_eq!(&format!("{}", genotypes.get(sample)), exp_gt);
1111 }
1112 }
1113 }
1114
1115 #[test]
1116 fn test_genotypes_write_and_read_mixed_ploidy() {
1117 let mut vcf = Reader::from_path("test/test_non_diploid.vcf").expect("Error opening file.");
1118
1119 let tmp = NamedTempFile::new().unwrap();
1121 let path = tmp.path();
1122
1123 {
1124 let mut writer = Writer::from_path(
1126 path,
1127 &Header::from_template(vcf.header()),
1128 true,
1129 Format::Vcf,
1130 )
1131 .unwrap();
1132
1133 let mut rec_tpl = vcf.records().next().unwrap().unwrap();
1135 rec_tpl
1136 .push_genotype_structured(
1137 &[
1138 vec![GenotypeAllele::Unphased(0)],
1139 vec![GenotypeAllele::Unphased(1)],
1140 ],
1141 3,
1142 )
1143 .unwrap();
1144 writer.write(&rec_tpl).unwrap();
1145 rec_tpl
1146 .push_genotype_structured(
1147 &[
1148 vec![GenotypeAllele::Unphased(0), GenotypeAllele::Unphased(1)],
1149 vec![GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(1)],
1150 ],
1151 3,
1152 )
1153 .unwrap();
1154 writer.write(&rec_tpl).unwrap();
1155 rec_tpl
1156 .push_genotype_structured(
1157 &[
1158 vec![GenotypeAllele::Unphased(1), GenotypeAllele::Phased(0)],
1159 vec![
1160 GenotypeAllele::Unphased(1),
1161 GenotypeAllele::Unphased(1),
1162 GenotypeAllele::Phased(0),
1163 ],
1164 ],
1165 3,
1166 )
1167 .unwrap();
1168 writer.write(&rec_tpl).unwrap();
1169 }
1170
1171 let mut reader = Reader::from_path(path).unwrap();
1173
1174 let expected = [vec!["0", "1"], vec!["0/1", "1/1"], vec!["1|0", "1/1|0"]];
1176
1177 for (rec, exp_gts) in reader.records().zip(expected.iter()) {
1179 let rec = rec.expect("Error reading record");
1180 let genotypes = rec.genotypes().expect("Error reading genotypes");
1181
1182 for (sample, exp_gt) in exp_gts.iter().enumerate() {
1184 assert_eq!(&format!("{}", genotypes.get(sample)), exp_gt);
1185 }
1186 }
1187 }
1188
1189 #[test]
1190 fn test_genotypes_wrong_max_ploidy() {
1191 let mut vcf = Reader::from_path("test/test_non_diploid.vcf").expect("Error opening file.");
1192
1193 let mut rec_tpl = vcf.records().next().unwrap().unwrap();
1195 let err = rec_tpl
1196 .push_genotype_structured(
1197 &[
1198 vec![
1199 GenotypeAllele::Unphased(0),
1200 GenotypeAllele::Unphased(1),
1201 GenotypeAllele::Unphased(0),
1202 ],
1203 vec![
1204 GenotypeAllele::Unphased(1),
1205 GenotypeAllele::Unphased(0),
1206 GenotypeAllele::Unphased(1),
1207 GenotypeAllele::Unphased(0),
1208 ],
1209 ],
1210 3,
1211 )
1212 .expect_err(
1213 "This should fail since there are more alleles specified (4 for second sample) than max_ploidy (3) suggests",
1214 );
1215 assert_eq!(err, crate::errors::Error::BcfSetValues);
1216 }
1217
1218 #[test]
1219 fn test_header_ids() {
1220 let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1221 let header = &vcf.header();
1222 use crate::bcf::header::Id;
1223
1224 assert_eq!(header.id_to_name(Id(4)), b"GT");
1225 assert_eq!(header.name_to_id(b"GT").unwrap(), Id(4));
1226 assert!(header.name_to_id(b"XX").is_err());
1227 }
1228
1229 #[test]
1230 fn test_header_samples() {
1231 let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1232 let header = &vcf.header();
1233
1234 assert_eq!(header.id_to_sample(Id(0)), b"one");
1235 assert_eq!(header.id_to_sample(Id(1)), b"two");
1236 assert_eq!(header.sample_to_id(b"one").unwrap(), Id(0));
1237 assert_eq!(header.sample_to_id(b"two").unwrap(), Id(1));
1238 assert!(header.sample_to_id(b"three").is_err());
1239 }
1240
1241 #[test]
1242 fn test_header_contigs() {
1243 let vcf = Reader::from_path("test/test_multi.bcf").expect("Error opening file.");
1244 let header = &vcf.header();
1245
1246 assert_eq!(header.contig_count(), 86);
1247
1248 assert_eq!(header.rid2name(0).unwrap(), b"1");
1250 assert_eq!(header.name2rid(b"1").unwrap(), 0);
1251
1252 assert_eq!(header.rid2name(85).unwrap(), b"hs37d5");
1253 assert_eq!(header.name2rid(b"hs37d5").unwrap(), 85);
1254
1255 assert!(header.name2rid(b"nonexistent_contig").is_err());
1257 assert!(header.rid2name(100).is_err());
1258 }
1259
1260 #[test]
1261 fn test_header_records() {
1262 let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1263 let records = vcf.header().header_records();
1264 assert_eq!(records.len(), 10);
1265
1266 match records[1] {
1267 HeaderRecord::Filter {
1268 ref key,
1269 ref values,
1270 } => {
1271 assert_eq!(key, "FILTER");
1272 assert_eq!(values["ID"], "PASS");
1273 }
1274 _ => {
1275 panic!("Invalid HeaderRecord");
1276 }
1277 }
1278 }
1279
1280 #[test]
1281 fn test_header_info_types() {
1282 let vcf = Reader::from_path("test/test.bcf").unwrap();
1283 let header = vcf.header();
1284 let truth = vec![
1285 (
1286 "INDEL",
1288 header::TagType::Flag,
1289 header::TagLength::Fixed(0),
1290 ),
1291 (
1292 "DP",
1294 header::TagType::Integer,
1295 header::TagLength::Fixed(1),
1296 ),
1297 (
1298 "QS",
1300 header::TagType::Float,
1301 header::TagLength::Alleles,
1302 ),
1303 (
1304 "I16",
1306 header::TagType::Float,
1307 header::TagLength::Fixed(16),
1308 ),
1309 ];
1310 for (ref_name, ref_type, ref_length) in truth {
1311 let (tag_type, tag_length) = header.info_type(ref_name.as_bytes()).unwrap();
1312 assert_eq!(tag_type, ref_type);
1313 assert_eq!(tag_length, ref_length);
1314 }
1315
1316 let vcf = Reader::from_path("test/test_svlen.vcf").unwrap();
1317 let header = vcf.header();
1318 let truth = vec![
1319 (
1320 "IMPRECISE",
1322 header::TagType::Flag,
1323 header::TagLength::Fixed(0),
1324 ),
1325 (
1326 "SVTYPE",
1328 header::TagType::String,
1329 header::TagLength::Fixed(1),
1330 ),
1331 (
1332 "SVLEN",
1334 header::TagType::Integer,
1335 header::TagLength::Variable,
1336 ),
1337 (
1338 "CIGAR",
1340 header::TagType::String,
1341 header::TagLength::AltAlleles,
1342 ),
1343 ];
1344 for (ref_name, ref_type, ref_length) in truth {
1345 let (tag_type, tag_length) = header.info_type(ref_name.as_bytes()).unwrap();
1346 assert_eq!(tag_type, ref_type);
1347 assert_eq!(tag_length, ref_length);
1348 }
1349
1350 assert!(header.info_type(b"NOT_THERE").is_err());
1351 }
1352
1353 #[test]
1354 fn test_remove_alleles() {
1355 let mut bcf = Reader::from_path("test/test_multi.bcf").unwrap();
1356 for res in bcf.records() {
1357 let mut record = res.unwrap();
1358 if record.pos() == 10080 {
1359 record.remove_alleles(&[false, false, true]).unwrap();
1360 assert_eq!(record.alleles(), [b"A", b"C"]);
1361 }
1362 }
1363 }
1364
1365 fn read_all<P: AsRef<Path>>(path: P) -> String {
1367 let mut file = File::open(path.as_ref())
1368 .unwrap_or_else(|_| panic!("Unable to open the file: {:?}", path.as_ref()));
1369 let mut contents = String::new();
1370 file.read_to_string(&mut contents)
1371 .unwrap_or_else(|_| panic!("Unable to read the file: {:?}", path.as_ref()));
1372 contents
1373 }
1374
1375 #[test]
1379 fn test_write_various() {
1380 let tmp = tempfile::Builder::new()
1382 .prefix("rust-htslib")
1383 .tempdir()
1384 .expect("Cannot create temp dir");
1385 let out_path = tmp.path().join("test_various.out.vcf");
1386
1387 let vcf = Reader::from_path("test/test_various.vcf").expect("Error opening file.");
1388 {
1391 let mut writer = Writer::from_path(
1392 &out_path,
1393 &Header::from_template(vcf.header()),
1394 true,
1395 Format::Vcf,
1396 )
1397 .expect("Error opening file.");
1398
1399 let mut record = writer.empty_record();
1401
1402 record.set_rid(Some(0));
1403 assert_eq!(record.rid().unwrap(), 0);
1404
1405 record.set_pos(12);
1406 assert_eq!(record.pos(), 12);
1407
1408 assert_eq!(str::from_utf8(record.id().as_ref()).unwrap(), ".");
1409 record.set_id(b"to_be_cleared").unwrap();
1410 assert_eq!(
1411 str::from_utf8(record.id().as_ref()).unwrap(),
1412 "to_be_cleared"
1413 );
1414 record.clear_id().unwrap();
1415 assert_eq!(str::from_utf8(record.id().as_ref()).unwrap(), ".");
1416 record.set_id(b"first_id").unwrap();
1417 record.push_id(b"second_id").unwrap();
1418 record.push_id(b"first_id").unwrap();
1419
1420 assert!(record.filters().next().is_none());
1421 record.set_filters(&["q10".as_bytes()]).unwrap();
1422 record.push_filter("s50".as_bytes()).unwrap();
1423 record.remove_filter("q10".as_bytes(), true).unwrap();
1424 record.push_filter("q10".as_bytes()).unwrap();
1425
1426 record.set_alleles(&[b"C", b"T", b"G"]).unwrap();
1427
1428 record.set_qual(10.0);
1429
1430 record.push_info_integer(b"N1", &[32]).unwrap();
1431 record.push_info_float(b"F1", &[33.0]).unwrap();
1432 record.push_info_string(b"S1", &[b"fourtytwo"]).unwrap();
1433 record.push_info_flag(b"X1").unwrap();
1434
1435 record
1436 .push_genotypes(&[
1437 GenotypeAllele::Unphased(0),
1438 GenotypeAllele::Unphased(1),
1439 GenotypeAllele::Unphased(1),
1440 GenotypeAllele::Phased(1),
1441 ])
1442 .unwrap();
1443
1444 record
1445 .push_format_string(b"FS1", &[&b"yes"[..], &b"no"[..]])
1446 .unwrap();
1447 record.push_format_integer(b"FF1", &[43, 11]).unwrap();
1448 record.push_format_float(b"FN1", &[42.0, 10.0]).unwrap();
1449 record
1450 .push_format_char(b"CH1", &[b"A"[0], b"B"[0]])
1451 .unwrap();
1452
1453 writer.write(&record).unwrap();
1455 }
1456
1457 let expected = read_all("test/test_various.out.vcf");
1459 let actual = read_all(&out_path);
1460 assert_eq!(expected, actual);
1461 }
1462
1463 #[test]
1464 fn test_remove_headers() {
1465 let vcf = Reader::from_path("test/test_headers.vcf").expect("Error opening file.");
1466 let tmp = tempfile::Builder::new()
1467 .prefix("rust-htslib")
1468 .tempdir()
1469 .expect("Cannot create temp dir");
1470 let vcfpath = tmp.path().join("test.vcf");
1471 let mut header = Header::from_template(&vcf.header);
1472 header
1473 .remove_contig(b"contig2")
1474 .remove_info(b"INFO2")
1475 .remove_format(b"FORMAT2")
1476 .remove_filter(b"FILTER2")
1477 .remove_structured(b"Foo2")
1478 .remove_generic(b"Bar2");
1479 {
1480 let mut _writer = Writer::from_path(&vcfpath, &header, true, Format::Vcf)
1481 .expect("Error opening output file.");
1482 }
1484
1485 let expected = read_all("test/test_headers.out.vcf");
1486 let actual = read_all(&vcfpath);
1487 assert_eq!(expected, actual);
1488 }
1489
1490 #[test]
1491 fn test_synced_reader() {
1492 let mut reader = synced::SyncedReader::new().unwrap();
1493 reader.set_require_index(true);
1494 reader.set_pairing(synced::pairing::SNPS);
1495
1496 assert_eq!(reader.reader_count(), 0);
1497 reader.add_reader("test/test_left.vcf.gz").unwrap();
1498 reader.add_reader("test/test_right.vcf.gz").unwrap();
1499 assert_eq!(reader.reader_count(), 2);
1500
1501 let res1 = reader.read_next();
1502 assert_eq!(res1.unwrap(), 2);
1503 assert!(reader.has_line(0));
1504 assert!(reader.has_line(1));
1505
1506 let res2 = reader.read_next();
1507 assert_eq!(res2.unwrap(), 1);
1508 assert!(reader.has_line(0));
1509 assert!(!reader.has_line(1));
1510
1511 let res3 = reader.read_next();
1512 assert_eq!(res3.unwrap(), 1);
1513 assert!(!reader.has_line(0));
1514 assert!(reader.has_line(1));
1515
1516 let res4 = reader.read_next();
1517 assert_eq!(res4.unwrap(), 0);
1518 }
1519
1520 #[test]
1521 fn test_synced_reader_fetch() {
1522 let mut reader = synced::SyncedReader::new().unwrap();
1523 reader.set_require_index(true);
1524 reader.set_pairing(synced::pairing::SNPS);
1525
1526 assert_eq!(reader.reader_count(), 0);
1527 reader.add_reader("test/test_left.vcf.gz").unwrap();
1528 reader.add_reader("test/test_right.vcf.gz").unwrap();
1529 assert_eq!(reader.reader_count(), 2);
1530
1531 reader.fetch(0, 0, 1000).unwrap();
1532 let res1 = reader.read_next();
1533 assert_eq!(res1.unwrap(), 2);
1534 assert!(reader.has_line(0));
1535 assert!(reader.has_line(1));
1536
1537 let res2 = reader.read_next();
1538 assert_eq!(res2.unwrap(), 1);
1539 assert!(reader.has_line(0));
1540 assert!(!reader.has_line(1));
1541
1542 let res3 = reader.read_next();
1543 assert_eq!(res3.unwrap(), 1);
1544 assert!(!reader.has_line(0));
1545 assert!(reader.has_line(1));
1546
1547 let res4 = reader.read_next();
1548 assert_eq!(res4.unwrap(), 0);
1549 }
1550
1551 #[test]
1552 fn test_svlen() {
1553 let mut reader = Reader::from_path("test/test_svlen.vcf").unwrap();
1554
1555 let mut record = reader.empty_record();
1556 reader.read(&mut record).unwrap().unwrap();
1557
1558 assert_eq!(
1559 *record.info(b"SVLEN").integer().unwrap().unwrap(),
1560 &[-127][..]
1561 );
1562 }
1563
1564 #[test]
1565 fn test_fails_on_bam() {
1566 let reader = Reader::from_path("test/test.bam");
1567 assert!(reader.is_err());
1568 }
1569
1570 #[test]
1571 fn test_fails_on_non_existiant() {
1572 let reader = Reader::from_path("test/no_such_file");
1573 assert!(reader.is_err());
1574 }
1575
1576 #[test]
1577 fn test_multi_string_info_tag() {
1578 let mut reader = Reader::from_path("test/test-info-multi-string.vcf").unwrap();
1579 let mut rec = reader.empty_record();
1580 let _ = reader.read(&mut rec);
1581
1582 assert_eq!(
1583 rec.info_shared_buffer(b"ANN", Buffer::new())
1584 .string()
1585 .unwrap()
1586 .unwrap()
1587 .len(),
1588 14
1589 );
1590 }
1591
1592 #[test]
1593 fn test_multi_string_info_tag_number_a() {
1594 let mut reader = Reader::from_path("test/test-info-multi-string-number=A.vcf").unwrap();
1595 let mut rec = reader.empty_record();
1596 let _ = reader.read(&mut rec);
1597
1598 assert_eq!(
1599 rec.info_shared_buffer(b"X", Buffer::new())
1600 .string()
1601 .unwrap()
1602 .unwrap()
1603 .len(),
1604 2
1605 );
1606 }
1607
1608 #[test]
1609 fn test_genotype_allele_conversion() {
1610 let allele = GenotypeAllele::Unphased(1);
1611 let converted: i32 = allele.into();
1612 let expected = 4;
1613 assert_eq!(converted, expected);
1614 let reverse_conversion = GenotypeAllele::from(expected);
1615 assert_eq!(allele, reverse_conversion);
1616 }
1617
1618 #[test]
1619 fn test_genotype_missing_allele_conversion() {
1620 let allele = GenotypeAllele::PhasedMissing;
1621 let converted: i32 = allele.into();
1622 let expected = 1;
1623 assert_eq!(converted, expected);
1624 let reverse_conversion = GenotypeAllele::from(expected);
1625 assert_eq!(allele, reverse_conversion);
1626 }
1627
1628 #[test]
1629 fn test_alt_allele_dosage() {
1630 let path = &"test/test_string.vcf";
1631 let mut bcf = Reader::from_path(path).expect("Error opening file.");
1632 let _header = bcf.header();
1633 let first_record = bcf.records().next().unwrap().expect("Fail to read record");
1636 let sample_count = usize::try_from(first_record.sample_count()).unwrap();
1637 assert_eq!(sample_count, 2);
1638 let mut n_ref = vec![0; sample_count];
1639 let mut n_alt = vec![0; sample_count];
1640 let mut n_missing = vec![0; sample_count];
1641 let gts = first_record.genotypes().expect("Error reading genotypes");
1642 for sample_index in 0..sample_count {
1643 for gta in gts.get(sample_index).iter() {
1645 match gta.index() {
1647 Some(0) => n_ref[sample_index] += 1, Some(_) => n_alt[sample_index] += 1, None => n_missing[sample_index] += 1, }
1651 }
1652 }
1653 assert_eq!(n_ref, [0, 0]);
1654 assert_eq!(n_alt, [1, 2]);
1655 assert_eq!(n_missing, [1, 0]);
1656 }
1657
1658 #[test]
1659 fn test_obs_cornercase() {
1660 let mut reader = Reader::from_path("test/obs-cornercase.vcf").unwrap();
1661 let first_record = reader
1662 .records()
1663 .next()
1664 .unwrap()
1665 .expect("Fail to read record");
1666
1667 assert_eq!(
1668 *first_record.info(b"EVENT").string().unwrap().unwrap(),
1669 [b"gridss33fb_1085"]
1670 );
1671 assert_eq!(
1672 *first_record.info(b"MATEID").string().unwrap().unwrap(),
1673 [b"gridss33fb_1085h"]
1674 );
1675 }
1676
1677 #[test]
1678 fn test_trailing_omitted_format_fields() {
1679 let mut reader = Reader::from_path("test/test_trailing_omitted_format.vcf").unwrap();
1680 let first_record = reader
1681 .records()
1682 .next()
1683 .unwrap()
1684 .expect("Fail to read record");
1685
1686 let expected: Vec<&[u8]> = Vec::new();
1687 assert_eq!(*first_record.format(b"STR").string().unwrap(), expected,);
1688 assert_eq!(
1689 *first_record.format(b"INT").integer().unwrap(),
1690 vec![&[i32::missing()]],
1691 );
1692 assert!(first_record.format(b"FLT").float().unwrap()[0][0].is_nan(),);
1693 }
1694
1695 }