1use std::ffi;
107use std::path::Path;
108use std::str;
109use std::sync::Arc;
110
111use url::Url;
112
113pub mod buffer;
114pub mod header;
115pub mod index;
116pub mod record;
117
118use crate::bcf::header::{HeaderView, SampleSubset};
119use crate::errors::{Error, Result};
120use crate::htslib;
121
122pub use crate::bcf::header::{Header, HeaderRecord};
123pub use crate::bcf::record::Record;
124
125pub trait Read: Sized {
127 fn read(&mut self, record: &mut record::Record) -> Option<Result<()>>;
136
137 fn records(&mut self) -> Records<'_, Self>;
139
140 fn header(&self) -> &HeaderView;
142
143 fn empty_record(&self) -> Record;
145
146 fn set_threads(&mut self, n_threads: usize) -> Result<()>;
156}
157
158#[derive(Debug)]
160pub struct Reader {
161 inner: *mut htslib::htsFile,
162 header: Arc<HeaderView>,
163}
164
165unsafe impl Send for Reader {}
166
167pub unsafe fn set_threads(hts_file: *mut htslib::htsFile, n_threads: usize) -> Result<()> {
171 assert!(n_threads > 0, "n_threads must be > 0");
172
173 let r = htslib::hts_set_threads(hts_file, n_threads as i32);
174 if r != 0 {
175 Err(Error::SetThreads)
176 } else {
177 Ok(())
178 }
179}
180
181impl Reader {
182 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
184 match path.as_ref().to_str() {
185 Some(p) if !path.as_ref().exists() => Err(Error::FileNotFound { path: p.into() }),
186 Some(p) => Self::new(p.as_bytes()),
187 _ => Err(Error::NonUnicodePath),
188 }
189 }
190
191 pub fn from_url(url: &Url) -> Result<Self> {
193 Self::new(url.as_str().as_bytes())
194 }
195
196 pub fn from_stdin() -> Result<Self> {
198 Self::new(b"-")
199 }
200
201 fn new(path: &[u8]) -> Result<Self> {
202 let htsfile = bcf_open(path, b"r")?;
203 let header = unsafe { htslib::bcf_hdr_read(htsfile) };
204 Ok(Reader {
205 inner: htsfile,
206 header: Arc::new(unsafe { HeaderView::from_ptr(header) }),
207 })
208 }
209}
210
211impl Read for Reader {
212 fn read(&mut self, record: &mut record::Record) -> Option<Result<()>> {
213 match unsafe { htslib::bcf_read(self.inner, self.header.inner, record.inner) } {
214 0 => {
215 unsafe {
216 htslib::bcf_unpack(record.inner_mut(), htslib::BCF_UN_ALL as i32);
218 }
219 record.set_header(Arc::clone(&self.header));
220 Some(Ok(()))
221 }
222 -1 => None,
223 _ => Some(Err(Error::BcfInvalidRecord)),
224 }
225 }
226
227 fn records(&mut self) -> Records<'_, Self> {
228 Records::new(self)
229 }
230
231 fn set_threads(&mut self, n_threads: usize) -> Result<()> {
232 unsafe { set_threads(self.inner, n_threads) }
233 }
234
235 fn header(&self) -> &HeaderView {
236 &self.header
237 }
238
239 fn empty_record(&self) -> Record {
241 Record::new(self.header.clone())
242 }
243}
244
245impl Drop for Reader {
246 fn drop(&mut self) {
247 unsafe {
248 htslib::hts_close(self.inner);
249 }
250 }
251}
252
253#[derive(Debug)]
255pub struct IndexedReader {
256 inner: *mut htslib::bcf_srs_t,
258 header: Arc<HeaderView>,
260
261 current_region: Option<(u32, u64, Option<u64>)>,
263}
264
265unsafe impl Send for IndexedReader {}
266
267impl IndexedReader {
268 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
274 let path = path.as_ref();
275 match path.to_str() {
276 Some(p) if path.exists() => {
277 Self::new(&ffi::CString::new(p).map_err(|_| Error::NonUnicodePath)?)
278 }
279 Some(p) => Err(Error::FileNotFound { path: p.into() }),
280 None => Err(Error::NonUnicodePath),
281 }
282 }
283
284 pub fn from_url(url: &Url) -> Result<Self> {
286 Self::new(&ffi::CString::new(url.as_str()).unwrap())
287 }
288
289 fn new(path: &ffi::CStr) -> Result<Self> {
295 let ser_reader = unsafe { htslib::bcf_sr_init() };
297 unsafe {
298 htslib::bcf_sr_set_opt(ser_reader, 0);
299 } if unsafe { htslib::bcf_sr_add_reader(ser_reader, path.as_ptr()) } >= 0 {
302 let header = Arc::new(unsafe {
303 HeaderView::from_ptr(htslib::bcf_hdr_dup(
304 (*(*ser_reader).readers.offset(0)).header,
305 ))
306 });
307 Ok(IndexedReader {
308 inner: ser_reader,
309 header,
310 current_region: None,
311 })
312 } else {
313 Err(Error::BcfOpen {
314 target: path.to_str().unwrap().to_owned(),
315 })
316 }
317 }
318
319 pub fn fetch(&mut self, rid: u32, start: u64, end: Option<u64>) -> Result<()> {
332 let contig = self.header.rid2name(rid)?;
333 let contig = ffi::CString::new(contig).unwrap();
334 if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
335 Err(Error::GenomicSeek {
336 contig: contig.to_str().unwrap().to_owned(),
337 start,
338 })
339 } else {
340 self.current_region = Some((rid, start, end));
341 Ok(())
342 }
343 }
344}
345
346impl Read for IndexedReader {
347 fn read(&mut self, record: &mut record::Record) -> Option<Result<()>> {
348 match unsafe { htslib::bcf_sr_next_line(self.inner) } {
349 0 => {
350 if unsafe { (*self.inner).errnum } != 0 {
351 Some(Err(Error::BcfInvalidRecord))
352 } else {
353 None
354 }
355 }
356 i => {
357 assert!(i > 0, "Must not be negative");
358 unsafe {
363 htslib::bcf_copy(
364 record.inner,
365 *(*(*self.inner).readers.offset(0)).buffer.offset(0),
366 );
367 }
368
369 unsafe {
370 htslib::bcf_unpack(record.inner_mut(), htslib::BCF_UN_ALL as i32);
372 }
373
374 record.set_header(Arc::clone(&self.header));
375
376 match self.current_region {
377 Some((rid, _start, end)) => {
378 let endpos = end.unwrap_or(u64::MAX);
379 if Some(rid) == record.rid() && record.pos() as u64 <= endpos {
380 Some(Ok(()))
381 } else {
382 None
383 }
384 }
385 None => Some(Ok(())),
386 }
387 }
388 }
389 }
390
391 fn records(&mut self) -> Records<'_, Self> {
392 Records::new(self)
393 }
394
395 fn set_threads(&mut self, n_threads: usize) -> Result<()> {
396 assert!(n_threads > 0, "n_threads must be > 0");
397
398 let r = unsafe { htslib::bcf_sr_set_threads(self.inner, n_threads as i32) };
399 if r != 0 {
400 Err(Error::SetThreads)
401 } else {
402 Ok(())
403 }
404 }
405
406 fn header(&self) -> &HeaderView {
407 &self.header
408 }
409
410 fn empty_record(&self) -> Record {
411 Record::new(self.header.clone())
412 }
413}
414
415impl Drop for IndexedReader {
416 fn drop(&mut self) {
417 unsafe { htslib::bcf_sr_destroy(self.inner) };
418 }
419}
420
421pub mod synced {
423
424 use super::*;
425
426 pub mod pairing {
428 pub const SNPS: u32 = crate::htslib::BCF_SR_PAIR_SNPS;
430 pub const INDELS: u32 = crate::htslib::BCF_SR_PAIR_INDELS;
432 pub const ANY: u32 = crate::htslib::BCF_SR_PAIR_ANY;
434 pub const SOME: u32 = crate::htslib::BCF_SR_PAIR_SOME;
436 pub const SNP_REF: u32 = crate::htslib::BCF_SR_PAIR_SNP_REF;
438 pub const INDEL_REF: u32 = crate::htslib::BCF_SR_PAIR_INDEL_REF;
440 pub const EXACT: u32 = crate::htslib::BCF_SR_PAIR_EXACT;
442 pub const BOTH: u32 = crate::htslib::BCF_SR_PAIR_BOTH;
444 pub const BOTH_REF: u32 = crate::htslib::BCF_SR_PAIR_BOTH_REF;
446 }
447
448 #[derive(Debug)]
450 pub struct SyncedReader {
451 inner: *mut crate::htslib::bcf_srs_t,
453
454 headers: Vec<Arc<HeaderView>>,
456
457 current_region: Option<(u32, u64, u64)>,
459 }
460
461 impl SyncedReader {
463 pub fn new() -> Result<Self> {
464 let inner = unsafe { crate::htslib::bcf_sr_init() };
465 if inner.is_null() {
466 return Err(Error::BcfAllocationError);
467 }
468
469 Ok(SyncedReader {
470 inner,
471 headers: Vec::new(),
472 current_region: None,
473 })
474 }
475
476 pub fn set_require_index(&mut self, do_require: bool) {
478 unsafe {
479 (*self.inner).require_index = if do_require { 1 } else { 0 };
480 }
481 }
482
483 pub fn set_pairing(&mut self, bitmask: u32) {
485 unsafe {
486 crate::htslib::bcf_sr_set_opt(self.inner, 1, bitmask);
488 }
489 }
490
491 pub fn add_reader<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
493 match path.as_ref().to_str() {
494 Some(p) if path.as_ref().exists() => {
495 let p_cstring = ffi::CString::new(p).unwrap();
496 let res =
497 unsafe { crate::htslib::bcf_sr_add_reader(self.inner, p_cstring.as_ptr()) };
498
499 if res == 0 {
500 return Err(Error::BcfOpen {
501 target: p.to_owned(),
502 });
503 }
504
505 let i = (self.reader_count() - 1) as isize;
506 let header = Arc::new(unsafe {
507 HeaderView::from_ptr(crate::htslib::bcf_hdr_dup(
508 (*(*self.inner).readers.offset(i)).header,
509 ))
510 });
511 self.headers.push(header);
512 Ok(())
513 }
514 _ => Err(Error::NonUnicodePath),
515 }
516 }
517
518 pub fn remove_reader(&mut self, idx: u32) {
520 if idx >= self.reader_count() {
521 panic!("Invalid reader!");
522 } else {
523 unsafe {
524 crate::htslib::bcf_sr_remove_reader(self.inner, idx as i32);
525 }
526 self.headers.remove(idx as usize);
527 }
528 }
529
530 pub fn reader_count(&self) -> u32 {
532 unsafe { (*self.inner).nreaders as u32 }
533 }
534
535 pub fn read_next(&mut self) -> Result<u32> {
537 let num = unsafe { crate::htslib::bcf_sr_next_line(self.inner) as u32 };
538
539 if num == 0 {
540 if unsafe { (*self.inner).errnum } != 0 {
541 return Err(Error::BcfInvalidRecord);
542 }
543 Ok(0)
544 } else {
545 assert!(num > 0, "num returned by htslib must not be negative");
546 match self.current_region {
547 Some((rid, _start, end)) => {
548 for idx in 0..self.reader_count() {
549 if !self.has_line(idx) {
550 continue;
551 }
552 unsafe {
553 let record = *(*(*self.inner).readers.offset(idx as isize))
554 .buffer
555 .offset(0);
556 if (*record).rid != (rid as i32) || (*record).pos >= (end as i64) {
557 return Ok(0);
558 }
559 }
560 }
561 Ok(num)
562 }
563 None => Ok(num),
564 }
565 }
566 }
567
568 pub fn has_line(&self, idx: u32) -> bool {
570 if idx >= self.reader_count() {
571 panic!("Invalid reader!");
572 } else {
573 unsafe { (*(*self.inner).has_line.offset(idx as isize)) != 0 }
574 }
575 }
576
577 pub fn record(&self, idx: u32) -> Option<Record> {
579 if self.has_line(idx) {
580 let record = Record::new(self.headers[idx as usize].clone());
581 unsafe {
582 crate::htslib::bcf_copy(
583 record.inner,
584 *(*(*self.inner).readers.offset(idx as isize))
585 .buffer
586 .offset(0),
587 );
588 }
589 Some(record)
590 } else {
591 None
592 }
593 }
594
595 pub fn header(&self, idx: u32) -> &HeaderView {
597 if idx >= self.reader_count() {
599 panic!("Invalid reader!");
600 } else {
601 &self.headers[idx as usize]
602 }
603 }
604
605 pub fn fetch(&mut self, rid: u32, start: u64, end: u64) -> Result<()> {
614 let contig = {
615 let contig = self.header(0).rid2name(rid).unwrap(); ffi::CString::new(contig).unwrap()
617 };
618 if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
619 Err(Error::GenomicSeek {
620 contig: contig.to_str().unwrap().to_owned(),
621 start,
622 })
623 } else {
624 self.current_region = Some((rid, start, end));
625 Ok(())
626 }
627 }
628 }
629
630 impl Drop for SyncedReader {
631 fn drop(&mut self) {
632 unsafe { crate::htslib::bcf_sr_destroy(self.inner) };
633 }
634 }
635}
636
637#[derive(Clone, Copy, Debug, PartialEq, Eq)]
638pub enum Format {
639 Vcf,
640 Bcf,
641}
642
643#[derive(Debug)]
645pub struct Writer {
646 inner: *mut htslib::htsFile,
647 header: Arc<HeaderView>,
648 subset: Option<SampleSubset>,
649}
650
651unsafe impl Send for Writer {}
652
653impl Writer {
654 pub fn from_path<P: AsRef<Path>>(
663 path: P,
664 header: &Header,
665 uncompressed: bool,
666 format: Format,
667 ) -> Result<Self> {
668 if let Some(p) = path.as_ref().to_str() {
669 Ok(Self::new(p.as_bytes(), header, uncompressed, format)?)
670 } else {
671 Err(Error::NonUnicodePath)
672 }
673 }
674
675 pub fn from_url(
684 url: &Url,
685 header: &Header,
686 uncompressed: bool,
687 format: Format,
688 ) -> Result<Self> {
689 Self::new(url.as_str().as_bytes(), header, uncompressed, format)
690 }
691
692 pub fn from_stdout(header: &Header, uncompressed: bool, format: Format) -> Result<Self> {
700 Self::new(b"-", header, uncompressed, format)
701 }
702
703 fn new(path: &[u8], header: &Header, uncompressed: bool, format: Format) -> Result<Self> {
704 let mode: &[u8] = match (uncompressed, format) {
705 (true, Format::Vcf) => b"w",
706 (false, Format::Vcf) => b"wz",
707 (true, Format::Bcf) => b"wbu",
708 (false, Format::Bcf) => b"wb",
709 };
710
711 let htsfile = bcf_open(path, mode)?;
712 unsafe { htslib::bcf_hdr_write(htsfile, header.inner) };
713 Ok(Writer {
714 inner: htsfile,
715 header: Arc::new(unsafe { HeaderView::from_ptr(htslib::bcf_hdr_dup(header.inner)) }),
716 subset: header.subset.clone(),
717 })
718 }
719
720 pub fn header(&self) -> &HeaderView {
722 &self.header
723 }
724
725 pub fn empty_record(&self) -> Record {
729 Record::new(self.header.clone())
730 }
731
732 pub fn translate(&mut self, record: &mut record::Record) {
738 unsafe {
739 htslib::bcf_translate(self.header.inner, record.header().inner, record.inner);
740 }
741 record.set_header(Arc::clone(&self.header));
742 }
743
744 pub fn subset(&mut self, record: &mut record::Record) {
750 if let Some(ref mut subset) = self.subset {
751 unsafe {
752 htslib::bcf_subset(
753 self.header.inner,
754 record.inner,
755 subset.len() as i32,
756 subset.as_mut_ptr(),
757 );
758 }
759 }
760 }
761
762 pub fn write(&mut self, record: &record::Record) -> Result<()> {
768 if unsafe { htslib::bcf_write(self.inner, self.header.inner, record.inner) } == -1 {
769 Err(Error::WriteRecord)
770 } else {
771 Ok(())
772 }
773 }
774
775 pub fn set_threads(&mut self, n_threads: usize) -> Result<()> {
782 unsafe { set_threads(self.inner, n_threads) }
783 }
784}
785
786impl Drop for Writer {
787 fn drop(&mut self) {
788 unsafe {
789 htslib::hts_close(self.inner);
790 }
791 }
792}
793
794#[derive(Debug)]
795pub struct Records<'a, R: Read> {
796 reader: &'a mut R,
797}
798
799impl<'a, R: Read> Records<'a, R> {
800 pub fn new(reader: &'a mut R) -> Self {
801 Self { reader }
802 }
803}
804
805impl<R: Read> Iterator for Records<'_, R> {
806 type Item = Result<record::Record>;
807
808 fn next(&mut self) -> Option<Result<record::Record>> {
809 let mut record = self.reader.empty_record();
810 match self.reader.read(&mut record) {
811 Some(Err(e)) => Some(Err(e)),
812 Some(Ok(_)) => Some(Ok(record)),
813 None => None,
814 }
815 }
816}
817
818fn bcf_open(target: &[u8], mode: &[u8]) -> Result<*mut htslib::htsFile> {
820 let p = ffi::CString::new(target).unwrap();
821 let c_str = ffi::CString::new(mode).unwrap();
822 let ret = unsafe { htslib::hts_open(p.as_ptr(), c_str.as_ptr()) };
823
824 if ret.is_null() {
825 return Err(Error::BcfOpen {
826 target: str::from_utf8(target).unwrap().to_owned(),
827 });
828 }
829
830 unsafe {
831 if !(mode.contains(&b'w')
832 || (*ret).format.category == htslib::htsFormatCategory_variant_data)
833 {
834 return Err(Error::BcfOpen {
835 target: str::from_utf8(target).unwrap().to_owned(),
836 });
837 }
838 }
839 Ok(ret)
840}
841
842#[cfg(test)]
843mod tests {
844 use tempfile::NamedTempFile;
845
846 use super::record::Buffer;
847 use super::*;
848 use crate::bcf::header::Id;
849 use crate::bcf::record::GenotypeAllele;
850 use crate::bcf::record::Numeric;
851 use crate::bcf::Reader;
852 use std::convert::TryFrom;
853 use std::fs::File;
854 use std::io::prelude::Read as IoRead;
855 use std::path::Path;
856 use std::str;
857
858 fn _test_read<P: AsRef<Path>>(path: &P) {
859 let mut bcf = Reader::from_path(path).expect("Error opening file.");
860 assert_eq!(bcf.header.samples(), [b"NA12878.subsample-0.25-0"]);
861
862 for (i, rec) in bcf.records().enumerate() {
863 let record = rec.expect("Error reading record.");
864 assert_eq!(record.sample_count(), 1);
865
866 assert_eq!(record.rid().expect("Error reading rid."), 0);
867 assert_eq!(record.pos(), 10021 + i as i64);
868 assert!((record.qual() - 0f32).abs() < f32::EPSILON);
869 let mut buffer = Buffer::new();
870 assert!(
871 (record
872 .info_shared_buffer(b"MQ0F", &mut buffer)
873 .float()
874 .expect("Error reading info.")
875 .expect("Missing tag")[0]
876 - 1.0)
877 .abs()
878 < f32::EPSILON
879 );
880 if i == 59 {
881 assert!(
882 (record
883 .info_shared_buffer(b"SGB", &mut buffer)
884 .float()
885 .expect("Error reading info.")
886 .expect("Missing tag")[0]
887 - -0.379885)
888 .abs()
889 < f32::EPSILON
890 );
891 }
892 assert_eq!(record.alleles().iter().last().unwrap(), b"<X>");
894
895 let fmt = record.format(b"PL");
896 let pl = fmt.integer().expect("Error reading format.");
897 assert_eq!(pl.len(), 1);
898 if i == 59 {
899 assert_eq!(pl[0].len(), 6);
900 } else {
901 assert_eq!(pl[0].len(), 3);
902 }
903 }
904 }
905
906 #[test]
907 fn test_read() {
908 _test_read(&"test/test.bcf");
909 }
910
911 #[test]
912 fn test_reader_set_threads() {
913 let path = &"test/test.bcf";
914 let mut bcf = Reader::from_path(path).expect("Error opening file.");
915 bcf.set_threads(2).unwrap();
916 }
917
918 #[test]
919 fn test_writer_set_threads() {
920 let path = &"test/test.bcf";
921 let tmp = tempfile::Builder::new()
922 .prefix("rust-htslib")
923 .tempdir()
924 .expect("Cannot create temp dir");
925 let bcfpath = tmp.path().join("test.bcf");
926 let bcf = Reader::from_path(path).expect("Error opening file.");
927 let header = Header::from_template_subset(&bcf.header, &[b"NA12878.subsample-0.25-0"])
928 .expect("Error subsetting samples.");
929 let mut writer =
930 Writer::from_path(&bcfpath, &header, false, Format::Bcf).expect("Error opening file.");
931 writer.set_threads(2).unwrap();
932 }
933
934 #[test]
935 fn test_fetch() {
936 let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
937 bcf.set_threads(2).unwrap();
938 let rid = bcf
939 .header()
940 .name2rid(b"1")
941 .expect("Translating from contig '1' to ID failed.");
942 bcf.fetch(rid, 10_033, Some(10_060))
943 .expect("Fetching failed");
944 assert_eq!(bcf.records().count(), 28);
945 }
946
947 #[test]
948 fn test_fetch_all() {
949 let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
950 bcf.set_threads(2).unwrap();
951 let rid = bcf
952 .header()
953 .name2rid(b"1")
954 .expect("Translating from contig '1' to ID failed.");
955 bcf.fetch(rid, 0, None).expect("Fetching failed");
956 assert_eq!(bcf.records().count(), 62);
957 }
958
959 #[test]
960 fn test_fetch_open_ended() {
961 let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
962 bcf.set_threads(2).unwrap();
963 let rid = bcf
964 .header()
965 .name2rid(b"1")
966 .expect("Translating from contig '1' to ID failed.");
967 bcf.fetch(rid, 10077, None).expect("Fetching failed");
968 assert_eq!(bcf.records().count(), 6);
969 }
970
971 #[test]
972 fn test_fetch_start_greater_than_last_vcf_pos() {
973 let mut bcf = IndexedReader::from_path("test/test.bcf").expect("Error opening file.");
974 bcf.set_threads(2).unwrap();
975 let rid = bcf
976 .header()
977 .name2rid(b"1")
978 .expect("Translating from contig '1' to ID failed.");
979 bcf.fetch(rid, 20077, None).expect("Fetching failed");
980 assert_eq!(bcf.records().count(), 0);
981 }
982
983 #[test]
984 fn test_write() {
985 let mut bcf = Reader::from_path("test/test_multi.bcf").expect("Error opening file.");
986 let tmp = tempfile::Builder::new()
987 .prefix("rust-htslib")
988 .tempdir()
989 .expect("Cannot create temp dir");
990 let bcfpath = tmp.path().join("test.bcf");
991 println!("{:?}", bcfpath);
992 {
993 let header = Header::from_template_subset(&bcf.header, &[b"NA12878.subsample-0.25-0"])
994 .expect("Error subsetting samples.");
995 let mut writer = Writer::from_path(&bcfpath, &header, false, Format::Bcf)
996 .expect("Error opening file.");
997 for rec in bcf.records() {
998 let mut record = rec.expect("Error reading record.");
999 writer.translate(&mut record);
1000 writer.subset(&mut record);
1001 record.trim_alleles().expect("Error trimming alleles.");
1002 writer.write(&record).expect("Error writing record");
1003 }
1004 }
1005 {
1006 _test_read(&bcfpath);
1007 }
1008 tmp.close().expect("Failed to delete temp dir");
1009 }
1010
1011 #[test]
1012 fn test_strings() {
1013 let mut vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1014 let fs1 = [
1015 &b"LongString1"[..],
1016 &b"LongString2"[..],
1017 &b"."[..],
1018 &b"LongString4"[..],
1019 &b"evenlength"[..],
1020 &b"ss6"[..],
1021 ];
1022 let mut buffer = Buffer::new();
1023 for (i, rec) in vcf.records().enumerate() {
1024 println!("record {}", i);
1025 let record = rec.expect("Error reading record.");
1026 assert_eq!(
1027 record
1028 .info_shared_buffer(b"S1", &mut buffer)
1029 .string()
1030 .expect("Error reading string.")
1031 .expect("Missing tag")[0],
1032 format!("string{}", i + 1).as_bytes()
1033 );
1034 let fs1_str_vec = record
1035 .format_shared_buffer(b"FS1", &mut buffer)
1036 .string()
1037 .expect("Error reading string.");
1038 assert_eq!(fs1_str_vec.len(), 2);
1039 println!("{}", String::from_utf8_lossy(fs1_str_vec[0]));
1040 assert_eq!(
1041 record
1042 .format(b"FS1")
1043 .string()
1044 .expect("Error reading string.")[0],
1045 fs1[i]
1046 );
1047 }
1048 }
1049
1050 #[test]
1051 fn test_missing() {
1052 let mut vcf = Reader::from_path("test/test_missing.vcf").expect("Error opening file.");
1053 let fn4 = [
1054 &[
1055 i32::missing(),
1056 i32::missing(),
1057 i32::missing(),
1058 i32::missing(),
1059 ][..],
1060 &[i32::missing()][..],
1061 ];
1062 let f1 = [false, true];
1063 let mut buffer = Buffer::new();
1064 for (i, rec) in vcf.records().enumerate() {
1065 let record = rec.expect("Error reading record.");
1066 assert_eq!(
1067 record
1068 .info_shared_buffer(b"F1", &mut buffer)
1069 .float()
1070 .expect("Error reading float.")
1071 .expect("Missing tag")[0]
1072 .is_nan(),
1073 f1[i]
1074 );
1075 assert_eq!(
1076 record
1077 .format(b"FN4")
1078 .integer()
1079 .expect("Error reading integer.")[1],
1080 fn4[i]
1081 );
1082 assert!(
1083 record.format(b"FF4").float().expect("Error reading float.")[1]
1084 .iter()
1085 .all(|&v| v.is_missing())
1086 );
1087 }
1088 }
1089
1090 #[test]
1091 fn test_genotypes() {
1092 let mut vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1093 let expected = ["./1", "1|1", "0/1", "0|1", "1|.", "1/1"];
1094 for (rec, exp_gt) in vcf.records().zip(expected.iter()) {
1095 let rec = rec.expect("Error reading record.");
1096 let genotypes = rec.genotypes().expect("Error reading genotypes");
1097 assert_eq!(&format!("{}", genotypes.get(0)), exp_gt);
1098 }
1099 }
1100
1101 #[test]
1102 fn test_genotypes_read_mixed_ploidy() {
1103 let mut vcf = Reader::from_path("test/test_non_diploid.vcf").expect("Error opening file.");
1104
1105 let expected = [vec!["0", "1"], vec!["0/1", "1/1"], vec!["1|0", "1/1|0"]];
1107
1108 for (rec, exp_gts) in vcf.records().zip(expected.iter()) {
1109 let rec = rec.expect("Error reading record.");
1110
1111 let genotypes = rec.genotypes().expect("Error reading genotypes");
1113
1114 for (sample, exp_gt) in exp_gts.iter().enumerate() {
1116 assert_eq!(&format!("{}", genotypes.get(sample)), exp_gt);
1117 }
1118 }
1119 }
1120
1121 #[test]
1122 fn test_genotypes_write_and_read_mixed_ploidy() {
1123 let mut vcf = Reader::from_path("test/test_non_diploid.vcf").expect("Error opening file.");
1124
1125 let tmp = NamedTempFile::new().unwrap();
1127 let path = tmp.path();
1128
1129 {
1130 let mut writer = Writer::from_path(
1132 path,
1133 &Header::from_template(vcf.header()),
1134 true,
1135 Format::Vcf,
1136 )
1137 .unwrap();
1138
1139 let mut rec_tpl = vcf.records().next().unwrap().unwrap();
1141 rec_tpl
1142 .push_genotype_structured(
1143 &[
1144 vec![GenotypeAllele::Unphased(0)],
1145 vec![GenotypeAllele::Unphased(1)],
1146 ],
1147 3,
1148 )
1149 .unwrap();
1150 writer.write(&rec_tpl).unwrap();
1151 rec_tpl
1152 .push_genotype_structured(
1153 &[
1154 vec![GenotypeAllele::Unphased(0), GenotypeAllele::Unphased(1)],
1155 vec![GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(1)],
1156 ],
1157 3,
1158 )
1159 .unwrap();
1160 writer.write(&rec_tpl).unwrap();
1161 rec_tpl
1162 .push_genotype_structured(
1163 &[
1164 vec![GenotypeAllele::Unphased(1), GenotypeAllele::Phased(0)],
1165 vec![
1166 GenotypeAllele::Unphased(1),
1167 GenotypeAllele::Unphased(1),
1168 GenotypeAllele::Phased(0),
1169 ],
1170 ],
1171 3,
1172 )
1173 .unwrap();
1174 writer.write(&rec_tpl).unwrap();
1175 }
1176
1177 let mut reader = Reader::from_path(path).unwrap();
1179
1180 let expected = [vec!["0", "1"], vec!["0/1", "1/1"], vec!["1|0", "1/1|0"]];
1182
1183 for (rec, exp_gts) in reader.records().zip(expected.iter()) {
1185 let rec = rec.expect("Error reading record");
1186 let genotypes = rec.genotypes().expect("Error reading genotypes");
1187
1188 for (sample, exp_gt) in exp_gts.iter().enumerate() {
1190 assert_eq!(&format!("{}", genotypes.get(sample)), exp_gt);
1191 }
1192 }
1193 }
1194
1195 #[test]
1196 fn test_genotypes_wrong_max_ploidy() {
1197 let mut vcf = Reader::from_path("test/test_non_diploid.vcf").expect("Error opening file.");
1198
1199 let mut rec_tpl = vcf.records().next().unwrap().unwrap();
1201 let err = rec_tpl
1202 .push_genotype_structured(
1203 &[
1204 vec![
1205 GenotypeAllele::Unphased(0),
1206 GenotypeAllele::Unphased(1),
1207 GenotypeAllele::Unphased(0),
1208 ],
1209 vec![
1210 GenotypeAllele::Unphased(1),
1211 GenotypeAllele::Unphased(0),
1212 GenotypeAllele::Unphased(1),
1213 GenotypeAllele::Unphased(0),
1214 ],
1215 ],
1216 3,
1217 )
1218 .expect_err(
1219 "This should fail since there are more alleles specified (4 for second sample) than max_ploidy (3) suggests",
1220 );
1221 assert_eq!(err, crate::errors::Error::BcfSetValues);
1222 }
1223
1224 #[test]
1225 fn test_header_ids() {
1226 let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1227 let header = &vcf.header();
1228 use crate::bcf::header::Id;
1229
1230 assert_eq!(header.id_to_name(Id(4)), b"GT");
1231 assert_eq!(header.name_to_id(b"GT").unwrap(), Id(4));
1232 assert!(header.name_to_id(b"XX").is_err());
1233 }
1234
1235 #[test]
1236 fn test_header_samples() {
1237 let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1238 let header = &vcf.header();
1239
1240 assert_eq!(header.id_to_sample(Id(0)), b"one");
1241 assert_eq!(header.id_to_sample(Id(1)), b"two");
1242 assert_eq!(header.sample_to_id(b"one").unwrap(), Id(0));
1243 assert_eq!(header.sample_to_id(b"two").unwrap(), Id(1));
1244 assert!(header.sample_to_id(b"three").is_err());
1245 }
1246
1247 #[test]
1248 fn test_header_contigs() {
1249 let vcf = Reader::from_path("test/test_multi.bcf").expect("Error opening file.");
1250 let header = &vcf.header();
1251
1252 assert_eq!(header.contig_count(), 86);
1253
1254 assert_eq!(header.rid2name(0).unwrap(), b"1");
1256 assert_eq!(header.name2rid(b"1").unwrap(), 0);
1257
1258 assert_eq!(header.rid2name(85).unwrap(), b"hs37d5");
1259 assert_eq!(header.name2rid(b"hs37d5").unwrap(), 85);
1260
1261 assert!(header.name2rid(b"nonexistent_contig").is_err());
1263 assert!(header.rid2name(100).is_err());
1264 }
1265
1266 #[test]
1267 fn test_header_records() {
1268 let vcf = Reader::from_path("test/test_string.vcf").expect("Error opening file.");
1269 let records = vcf.header().header_records();
1270 assert_eq!(records.len(), 10);
1271
1272 match records[1] {
1273 HeaderRecord::Filter {
1274 ref key,
1275 ref values,
1276 } => {
1277 assert_eq!(key, "FILTER");
1278 assert_eq!(values["ID"], "PASS");
1279 }
1280 _ => {
1281 panic!("Invalid HeaderRecord");
1282 }
1283 }
1284 }
1285
1286 #[test]
1287 fn test_header_info_types() {
1288 let vcf = Reader::from_path("test/test.bcf").unwrap();
1289 let header = vcf.header();
1290 let truth = vec![
1291 (
1292 "INDEL",
1294 header::TagType::Flag,
1295 header::TagLength::Fixed(0),
1296 ),
1297 (
1298 "DP",
1300 header::TagType::Integer,
1301 header::TagLength::Fixed(1),
1302 ),
1303 (
1304 "QS",
1306 header::TagType::Float,
1307 header::TagLength::Alleles,
1308 ),
1309 (
1310 "I16",
1312 header::TagType::Float,
1313 header::TagLength::Fixed(16),
1314 ),
1315 ];
1316 for (ref_name, ref_type, ref_length) in truth {
1317 let (tag_type, tag_length) = header.info_type(ref_name.as_bytes()).unwrap();
1318 assert_eq!(tag_type, ref_type);
1319 assert_eq!(tag_length, ref_length);
1320 }
1321
1322 let vcf = Reader::from_path("test/test_svlen.vcf").unwrap();
1323 let header = vcf.header();
1324 let truth = vec![
1325 (
1326 "IMPRECISE",
1328 header::TagType::Flag,
1329 header::TagLength::Fixed(0),
1330 ),
1331 (
1332 "SVTYPE",
1334 header::TagType::String,
1335 header::TagLength::Fixed(1),
1336 ),
1337 (
1338 "SVLEN",
1340 header::TagType::Integer,
1341 header::TagLength::Variable,
1342 ),
1343 (
1344 "CIGAR",
1346 header::TagType::String,
1347 header::TagLength::AltAlleles,
1348 ),
1349 ];
1350 for (ref_name, ref_type, ref_length) in truth {
1351 let (tag_type, tag_length) = header.info_type(ref_name.as_bytes()).unwrap();
1352 assert_eq!(tag_type, ref_type);
1353 assert_eq!(tag_length, ref_length);
1354 }
1355
1356 assert!(header.info_type(b"NOT_THERE").is_err());
1357 }
1358
1359 #[test]
1360 fn test_remove_alleles() {
1361 let mut bcf = Reader::from_path("test/test_multi.bcf").unwrap();
1362 for res in bcf.records() {
1363 let mut record = res.unwrap();
1364 if record.pos() == 10080 {
1365 record.remove_alleles(&[false, false, true]).unwrap();
1366 assert_eq!(record.alleles(), [b"A", b"C"]);
1367 }
1368 }
1369 }
1370
1371 fn read_all<P: AsRef<Path>>(path: P) -> String {
1373 let mut file = File::open(path.as_ref())
1374 .unwrap_or_else(|_| panic!("Unable to open the file: {:?}", path.as_ref()));
1375 let mut contents = String::new();
1376 file.read_to_string(&mut contents)
1377 .unwrap_or_else(|_| panic!("Unable to read the file: {:?}", path.as_ref()));
1378 contents
1379 }
1380
1381 #[test]
1385 fn test_write_various() {
1386 let tmp = tempfile::Builder::new()
1388 .prefix("rust-htslib")
1389 .tempdir()
1390 .expect("Cannot create temp dir");
1391 let out_path = tmp.path().join("test_various.out.vcf");
1392
1393 let vcf = Reader::from_path("test/test_various.vcf").expect("Error opening file.");
1394 {
1397 let mut writer = Writer::from_path(
1398 &out_path,
1399 &Header::from_template(vcf.header()),
1400 true,
1401 Format::Vcf,
1402 )
1403 .expect("Error opening file.");
1404
1405 let mut record = writer.empty_record();
1407
1408 record.set_rid(Some(0));
1409 assert_eq!(record.rid().unwrap(), 0);
1410
1411 record.set_pos(12);
1412 assert_eq!(record.pos(), 12);
1413
1414 assert_eq!(str::from_utf8(record.id().as_ref()).unwrap(), ".");
1415 record.set_id(b"to_be_cleared").unwrap();
1416 assert_eq!(
1417 str::from_utf8(record.id().as_ref()).unwrap(),
1418 "to_be_cleared"
1419 );
1420 record.clear_id().unwrap();
1421 assert_eq!(str::from_utf8(record.id().as_ref()).unwrap(), ".");
1422 record.set_id(b"first_id").unwrap();
1423 record.push_id(b"second_id").unwrap();
1424 record.push_id(b"first_id").unwrap();
1425
1426 assert!(record.filters().next().is_none());
1427 record.set_filters(&["q10".as_bytes()]).unwrap();
1428 record.push_filter("s50".as_bytes()).unwrap();
1429 record.remove_filter("q10".as_bytes(), true).unwrap();
1430 record.push_filter("q10".as_bytes()).unwrap();
1431
1432 record.set_alleles(&[b"C", b"T", b"G"]).unwrap();
1433
1434 record.set_qual(10.0);
1435
1436 record.push_info_integer(b"N1", &[32]).unwrap();
1437 record.push_info_float(b"F1", &[33.0]).unwrap();
1438 record.push_info_string(b"S1", &[b"fourtytwo"]).unwrap();
1439 record.push_info_flag(b"X1").unwrap();
1440
1441 record
1442 .push_genotypes(&[
1443 GenotypeAllele::Unphased(0),
1444 GenotypeAllele::Unphased(1),
1445 GenotypeAllele::Unphased(1),
1446 GenotypeAllele::Phased(1),
1447 ])
1448 .unwrap();
1449
1450 record
1451 .push_format_string(b"FS1", &[&b"yes"[..], &b"no"[..]])
1452 .unwrap();
1453 record.push_format_integer(b"FF1", &[43, 11]).unwrap();
1454 record.push_format_float(b"FN1", &[42.0, 10.0]).unwrap();
1455 record
1456 .push_format_char(b"CH1", &[b"A"[0], b"B"[0]])
1457 .unwrap();
1458
1459 writer.write(&record).unwrap();
1461 }
1462
1463 let expected = read_all("test/test_various.out.vcf");
1465 let actual = read_all(&out_path);
1466 assert_eq!(expected, actual);
1467 }
1468
1469 #[test]
1470 fn test_remove_headers() {
1471 let vcf = Reader::from_path("test/test_headers.vcf").expect("Error opening file.");
1472 let tmp = tempfile::Builder::new()
1473 .prefix("rust-htslib")
1474 .tempdir()
1475 .expect("Cannot create temp dir");
1476 let vcfpath = tmp.path().join("test.vcf");
1477 let mut header = Header::from_template(&vcf.header);
1478 header
1479 .remove_contig(b"contig2")
1480 .remove_info(b"INFO2")
1481 .remove_format(b"FORMAT2")
1482 .remove_filter(b"FILTER2")
1483 .remove_structured(b"Foo2")
1484 .remove_generic(b"Bar2");
1485 {
1486 let mut _writer = Writer::from_path(&vcfpath, &header, true, Format::Vcf)
1487 .expect("Error opening output file.");
1488 }
1490
1491 let expected = read_all("test/test_headers.out.vcf");
1492 let actual = read_all(&vcfpath);
1493 assert_eq!(expected, actual);
1494 }
1495
1496 #[test]
1497 fn test_synced_reader() {
1498 let mut reader = synced::SyncedReader::new().unwrap();
1499 reader.set_require_index(true);
1500 reader.set_pairing(synced::pairing::SNPS);
1501
1502 assert_eq!(reader.reader_count(), 0);
1503 reader.add_reader("test/test_left.vcf.gz").unwrap();
1504 reader.add_reader("test/test_right.vcf.gz").unwrap();
1505 assert_eq!(reader.reader_count(), 2);
1506
1507 let res1 = reader.read_next();
1508 assert_eq!(res1.unwrap(), 2);
1509 assert!(reader.has_line(0));
1510 assert!(reader.has_line(1));
1511
1512 let res2 = reader.read_next();
1513 assert_eq!(res2.unwrap(), 1);
1514 assert!(reader.has_line(0));
1515 assert!(!reader.has_line(1));
1516
1517 let res3 = reader.read_next();
1518 assert_eq!(res3.unwrap(), 1);
1519 assert!(!reader.has_line(0));
1520 assert!(reader.has_line(1));
1521
1522 let res4 = reader.read_next();
1523 assert_eq!(res4.unwrap(), 0);
1524 }
1525
1526 #[test]
1527 fn test_synced_reader_fetch() {
1528 let mut reader = synced::SyncedReader::new().unwrap();
1529 reader.set_require_index(true);
1530 reader.set_pairing(synced::pairing::SNPS);
1531
1532 assert_eq!(reader.reader_count(), 0);
1533 reader.add_reader("test/test_left.vcf.gz").unwrap();
1534 reader.add_reader("test/test_right.vcf.gz").unwrap();
1535 assert_eq!(reader.reader_count(), 2);
1536
1537 reader.fetch(0, 0, 1000).unwrap();
1538 let res1 = reader.read_next();
1539 assert_eq!(res1.unwrap(), 2);
1540 assert!(reader.has_line(0));
1541 assert!(reader.has_line(1));
1542
1543 let res2 = reader.read_next();
1544 assert_eq!(res2.unwrap(), 1);
1545 assert!(reader.has_line(0));
1546 assert!(!reader.has_line(1));
1547
1548 let res3 = reader.read_next();
1549 assert_eq!(res3.unwrap(), 1);
1550 assert!(!reader.has_line(0));
1551 assert!(reader.has_line(1));
1552
1553 let res4 = reader.read_next();
1554 assert_eq!(res4.unwrap(), 0);
1555 }
1556
1557 #[test]
1558 fn test_svlen() {
1559 let mut reader = Reader::from_path("test/test_svlen.vcf").unwrap();
1560
1561 let mut record = reader.empty_record();
1562 reader.read(&mut record).unwrap().unwrap();
1563
1564 assert_eq!(
1565 *record.info(b"SVLEN").integer().unwrap().unwrap(),
1566 &[-127][..]
1567 );
1568 }
1569
1570 #[test]
1571 fn test_fails_on_bam() {
1572 let reader = Reader::from_path("test/test.bam");
1573 assert!(reader.is_err());
1574 }
1575
1576 #[test]
1577 fn test_fails_on_non_existiant() {
1578 let reader = Reader::from_path("test/no_such_file");
1579 assert!(reader.is_err());
1580 }
1581
1582 #[test]
1583 fn test_multi_string_info_tag() {
1584 let mut reader = Reader::from_path("test/test-info-multi-string.vcf").unwrap();
1585 let mut rec = reader.empty_record();
1586 let _ = reader.read(&mut rec);
1587
1588 assert_eq!(
1589 rec.info_shared_buffer(b"ANN", Buffer::new())
1590 .string()
1591 .unwrap()
1592 .unwrap()
1593 .len(),
1594 14
1595 );
1596 }
1597
1598 #[test]
1599 fn test_multi_string_info_tag_number_a() {
1600 let mut reader = Reader::from_path("test/test-info-multi-string-number=A.vcf").unwrap();
1601 let mut rec = reader.empty_record();
1602 let _ = reader.read(&mut rec);
1603
1604 assert_eq!(
1605 rec.info_shared_buffer(b"X", Buffer::new())
1606 .string()
1607 .unwrap()
1608 .unwrap()
1609 .len(),
1610 2
1611 );
1612 }
1613
1614 #[test]
1615 fn test_genotype_allele_conversion() {
1616 let allele = GenotypeAllele::Unphased(1);
1617 let converted: i32 = allele.into();
1618 let expected = 4;
1619 assert_eq!(converted, expected);
1620 let reverse_conversion = GenotypeAllele::from(expected);
1621 assert_eq!(allele, reverse_conversion);
1622 }
1623
1624 #[test]
1625 fn test_genotype_missing_allele_conversion() {
1626 let allele = GenotypeAllele::PhasedMissing;
1627 let converted: i32 = allele.into();
1628 let expected = 1;
1629 assert_eq!(converted, expected);
1630 let reverse_conversion = GenotypeAllele::from(expected);
1631 assert_eq!(allele, reverse_conversion);
1632 }
1633
1634 #[test]
1635 fn test_alt_allele_dosage() {
1636 let path = &"test/test_string.vcf";
1637 let mut bcf = Reader::from_path(path).expect("Error opening file.");
1638 let _header = bcf.header();
1639 let first_record = bcf.records().next().unwrap().expect("Fail to read record");
1642 let sample_count = usize::try_from(first_record.sample_count()).unwrap();
1643 assert_eq!(sample_count, 2);
1644 let mut n_ref = vec![0; sample_count];
1645 let mut n_alt = vec![0; sample_count];
1646 let mut n_missing = vec![0; sample_count];
1647 let gts = first_record.genotypes().expect("Error reading genotypes");
1648 for sample_index in 0..sample_count {
1649 for gta in gts.get(sample_index).iter() {
1651 match gta.index() {
1653 Some(0) => n_ref[sample_index] += 1, Some(_) => n_alt[sample_index] += 1, None => n_missing[sample_index] += 1, }
1657 }
1658 }
1659 assert_eq!(n_ref, [0, 0]);
1660 assert_eq!(n_alt, [1, 2]);
1661 assert_eq!(n_missing, [1, 0]);
1662 }
1663
1664 #[test]
1665 fn test_obs_cornercase() {
1666 let mut reader = Reader::from_path("test/obs-cornercase.vcf").unwrap();
1667 let first_record = reader
1668 .records()
1669 .next()
1670 .unwrap()
1671 .expect("Fail to read record");
1672
1673 assert_eq!(
1674 *first_record.info(b"EVENT").string().unwrap().unwrap(),
1675 [b"gridss33fb_1085"]
1676 );
1677 assert_eq!(
1678 *first_record.info(b"MATEID").string().unwrap().unwrap(),
1679 [b"gridss33fb_1085h"]
1680 );
1681 }
1682
1683 #[test]
1684 fn test_trailing_omitted_format_fields() {
1685 let mut reader = Reader::from_path("test/test_trailing_omitted_format.vcf").unwrap();
1686 let first_record = reader
1687 .records()
1688 .next()
1689 .unwrap()
1690 .expect("Fail to read record");
1691
1692 let expected: Vec<&[u8]> = Vec::new();
1693 assert_eq!(*first_record.format(b"STR").string().unwrap(), expected,);
1694 assert_eq!(
1695 *first_record.format(b"INT").integer().unwrap(),
1696 vec![&[i32::missing()]],
1697 );
1698 assert!(first_record.format(b"FLT").float().unwrap()[0][0].is_nan(),);
1699 }
1700
1701 }