rust_htslib/bcf/record.rs
1// Copyright 2014 Johannes Köster.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6use std::borrow::{Borrow, BorrowMut};
7use std::fmt;
8use std::marker::PhantomData;
9use std::ops::Deref;
10use std::os::raw::c_char;
11use std::ptr;
12use std::rc::Rc;
13use std::slice;
14use std::str;
15use std::{ffi, iter};
16
17use bio_types::genome;
18use derive_new::new;
19use ieee754::Ieee754;
20use lazy_static::lazy_static;
21
22use crate::bcf::header::{HeaderView, Id};
23use crate::bcf::Error;
24use crate::errors::Result;
25use crate::htslib;
26
27const MISSING_INTEGER: i32 = i32::MIN;
28const VECTOR_END_INTEGER: i32 = i32::MIN + 1;
29
30lazy_static! {
31 static ref MISSING_FLOAT: f32 = Ieee754::from_bits(0x7F80_0001);
32 static ref VECTOR_END_FLOAT: f32 = Ieee754::from_bits(0x7F80_0002);
33}
34
35/// Common methods for numeric INFO and FORMAT entries
36pub trait Numeric {
37 /// Return true if entry is a missing value
38 fn is_missing(&self) -> bool;
39
40 /// Return missing value for storage in BCF record.
41 fn missing() -> Self;
42}
43
44impl Numeric for f32 {
45 fn is_missing(&self) -> bool {
46 self.bits() == MISSING_FLOAT.bits()
47 }
48
49 fn missing() -> f32 {
50 *MISSING_FLOAT
51 }
52}
53
54impl Numeric for i32 {
55 fn is_missing(&self) -> bool {
56 *self == MISSING_INTEGER
57 }
58
59 fn missing() -> i32 {
60 MISSING_INTEGER
61 }
62}
63
64trait NumericUtils {
65 /// Return true if entry marks the end of the record.
66 fn is_vector_end(&self) -> bool;
67}
68
69impl NumericUtils for f32 {
70 fn is_vector_end(&self) -> bool {
71 self.bits() == VECTOR_END_FLOAT.bits()
72 }
73}
74
75impl NumericUtils for i32 {
76 fn is_vector_end(&self) -> bool {
77 *self == VECTOR_END_INTEGER
78 }
79}
80
81/// A trait to allow for seamless use of bytes or integer identifiers for filters
82pub trait FilterId {
83 fn id_from_header(&self, header: &HeaderView) -> Result<Id>;
84 fn is_pass(&self) -> bool;
85}
86
87impl FilterId for [u8] {
88 fn id_from_header(&self, header: &HeaderView) -> Result<Id> {
89 header.name_to_id(self)
90 }
91 fn is_pass(&self) -> bool {
92 matches!(self, b"PASS" | b".")
93 }
94}
95
96impl FilterId for Id {
97 fn id_from_header(&self, _header: &HeaderView) -> Result<Id> {
98 Ok(*self)
99 }
100 fn is_pass(&self) -> bool {
101 *self == Id(0)
102 }
103}
104
105/// A buffer for info or format data.
106#[derive(Debug)]
107pub struct Buffer {
108 inner: *mut ::std::os::raw::c_void,
109 len: i32,
110}
111
112impl Buffer {
113 pub fn new() -> Self {
114 Buffer {
115 inner: ptr::null_mut(),
116 len: 0,
117 }
118 }
119}
120
121impl Default for Buffer {
122 fn default() -> Self {
123 Self::new()
124 }
125}
126
127impl Drop for Buffer {
128 fn drop(&mut self) {
129 unsafe {
130 ::libc::free(self.inner);
131 }
132 }
133}
134
135#[derive(new, Debug)]
136pub struct BufferBacked<'a, T: 'a + fmt::Debug, B: Borrow<Buffer> + 'a> {
137 value: T,
138 _buffer: B,
139 #[new(default)]
140 phantom: PhantomData<&'a B>,
141}
142
143impl<'a, T: 'a + fmt::Debug, B: Borrow<Buffer> + 'a> Deref for BufferBacked<'a, T, B> {
144 type Target = T;
145
146 fn deref(&self) -> &T {
147 &self.value
148 }
149}
150
151impl<'a, T: 'a + fmt::Debug + fmt::Display, B: Borrow<Buffer> + 'a> fmt::Display
152 for BufferBacked<'a, T, B>
153{
154 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155 fmt::Display::fmt(&self.value, f)
156 }
157}
158
159/// A VCF/BCF record.
160/// New records can be created by the `empty_record` methods of [`bcf::Reader`](crate::bcf::Reader)
161/// and [`bcf::Writer`](crate::bcf::Writer).
162/// # Example
163/// ```rust
164/// use rust_htslib::bcf::{Format, Writer};
165/// use rust_htslib::bcf::header::Header;
166///
167/// // Create minimal VCF header with a single sample
168/// let mut header = Header::new();
169/// header.push_sample("sample".as_bytes());
170///
171/// // Write uncompressed VCF to stdout with above header and get an empty record
172/// let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
173/// let mut record = vcf.empty_record();
174/// ```
175#[derive(Debug)]
176pub struct Record {
177 pub inner: *mut htslib::bcf1_t,
178 header: Rc<HeaderView>,
179}
180
181impl Record {
182 /// Construct record with reference to header `HeaderView`, for create-internal use.
183 pub(crate) fn new(header: Rc<HeaderView>) -> Self {
184 let inner = unsafe {
185 let inner = htslib::bcf_init();
186 // Always unpack record.
187 htslib::bcf_unpack(inner, htslib::BCF_UN_ALL as i32);
188 inner
189 };
190 Record { inner, header }
191 }
192
193 /// Force unpacking of internal record values.
194 pub fn unpack(&mut self) {
195 unsafe { htslib::bcf_unpack(self.inner, htslib::BCF_UN_ALL as i32) };
196 }
197
198 /// Return associated header.
199 pub fn header(&self) -> &HeaderView {
200 self.header.as_ref()
201 }
202
203 /// Set the record header.
204 pub(crate) fn set_header(&mut self, header: Rc<HeaderView>) {
205 self.header = header;
206 }
207
208 /// Return reference to the inner C struct.
209 ///
210 /// # Remarks
211 ///
212 /// Note that this function is only required as long as Rust-Htslib does not provide full
213 /// access to all aspects of Htslib.
214 pub fn inner(&self) -> &htslib::bcf1_t {
215 unsafe { &*self.inner }
216 }
217
218 /// Return mutable reference to inner C struct.
219 ///
220 /// # Remarks
221 ///
222 /// Note that this function is only required as long as Rust-Htslib does not provide full
223 /// access to all aspects of Htslib.
224 pub fn inner_mut(&mut self) -> &mut htslib::bcf1_t {
225 unsafe { &mut *self.inner }
226 }
227
228 /// Get the reference id of the record.
229 ///
230 /// To look up the contig name,
231 /// use [`HeaderView::rid2name`](../header/struct.HeaderView.html#method.rid2name).
232 ///
233 /// # Returns
234 ///
235 /// - `Some(rid)` if the internal `rid` is set to a value that is not `-1`
236 /// - `None` if the internal `rid` is set to `-1`
237 pub fn rid(&self) -> Option<u32> {
238 match self.inner().rid {
239 -1 => None,
240 rid => Some(rid as u32),
241 }
242 }
243
244 /// Update the reference id of the record.
245 ///
246 /// To look up reference id for a contig name,
247 /// use [`HeaderView::name2rid`](../header/struct.HeaderView.html#method.name2rid).
248 ///
249 /// # Example
250 ///
251 /// Example assumes we have a Record `record` from a VCF with a header containing region
252 /// named `1`. See [module documentation](../index.html#example-writing) for how to set
253 /// up VCF, header, and record.
254 ///
255 /// ```
256 /// # use rust_htslib::bcf::{Format, Writer};
257 /// # use rust_htslib::bcf::header::Header;
258 /// # let mut header = Header::new();
259 /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
260 /// # header.push_record(header_contig_line.as_bytes());
261 /// # header.push_sample("test_sample".as_bytes());
262 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
263 /// # let mut record = vcf.empty_record();
264 /// let rid = record.header().name2rid(b"1").ok();
265 /// record.set_rid(rid);
266 /// assert_eq!(record.rid(), rid);
267 /// let name = record.header().rid2name(record.rid().unwrap()).ok();
268 /// assert_eq!(Some("1".as_bytes()), name);
269 /// ```
270 pub fn set_rid(&mut self, rid: Option<u32>) {
271 match rid {
272 Some(rid) => self.inner_mut().rid = rid as i32,
273 None => self.inner_mut().rid = -1,
274 }
275 }
276
277 /// Return **0-based** position
278 pub fn pos(&self) -> i64 {
279 self.inner().pos
280 }
281
282 /// Set **0-based** position
283 pub fn set_pos(&mut self, pos: i64) {
284 self.inner_mut().pos = pos;
285 }
286
287 /// Return the **0-based, exclusive** end position
288 ///
289 /// # Example
290 /// ```rust
291 /// # use rust_htslib::bcf::{Format, Header, Writer};
292 /// # use tempfile::NamedTempFile;
293 /// # let tmp = NamedTempFile::new().unwrap();
294 /// # let path = tmp.path();
295 /// # let header = Header::new();
296 /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
297 /// # let mut record = vcf.empty_record();
298 /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
299 /// record.set_alleles(alleles).expect("Failed to set alleles");
300 /// record.set_pos(5);
301 ///
302 /// assert_eq!(record.end(), 8)
303 /// ```
304 pub fn end(&self) -> i64 {
305 self.pos() + self.rlen()
306 }
307
308 /// Return the value of the ID column.
309 ///
310 /// When empty, returns `b".".to_vec()`.
311 pub fn id(&self) -> Vec<u8> {
312 if self.inner().d.id.is_null() {
313 b".".to_vec()
314 } else {
315 let id = unsafe { ffi::CStr::from_ptr(self.inner().d.id) };
316 id.to_bytes().to_vec()
317 }
318 }
319
320 /// Update the ID string to the given value.
321 pub fn set_id(&mut self, id: &[u8]) -> Result<()> {
322 let c_str = ffi::CString::new(id).unwrap();
323 if unsafe {
324 htslib::bcf_update_id(
325 self.header().inner,
326 self.inner,
327 c_str.as_ptr() as *mut c_char,
328 )
329 } == 0
330 {
331 Ok(())
332 } else {
333 Err(Error::BcfSetValues)
334 }
335 }
336
337 /// Clear the ID column (set it to `"."`).
338 pub fn clear_id(&mut self) -> Result<()> {
339 let c_str = ffi::CString::new(&b"."[..]).unwrap();
340 if unsafe {
341 htslib::bcf_update_id(
342 self.header().inner,
343 self.inner,
344 c_str.as_ptr() as *mut c_char,
345 )
346 } == 0
347 {
348 Ok(())
349 } else {
350 Err(Error::BcfSetValues)
351 }
352 }
353
354 /// Add the ID string (the ID field is semicolon-separated), checking for duplicates.
355 pub fn push_id(&mut self, id: &[u8]) -> Result<()> {
356 let c_str = ffi::CString::new(id).unwrap();
357 if unsafe {
358 htslib::bcf_add_id(
359 self.header().inner,
360 self.inner,
361 c_str.as_ptr() as *mut c_char,
362 )
363 } == 0
364 {
365 Ok(())
366 } else {
367 Err(Error::BcfSetValues)
368 }
369 }
370
371 /// Return `Filters` iterator for enumerating all filters that have been set.
372 ///
373 /// A record having the `PASS` filter will return an empty `Filter` here.
374 pub fn filters(&self) -> Filters<'_> {
375 Filters::new(self)
376 }
377
378 /// Query whether the filter with the given ID has been set.
379 ///
380 /// This method can be used to check if a record passes filtering by using either `Id(0)`,
381 /// `PASS` or `.`
382 ///
383 /// # Example
384 /// ```rust
385 /// # use rust_htslib::bcf::{Format, Header, Writer};
386 /// # use rust_htslib::bcf::header::Id;
387 /// # use tempfile::NamedTempFile;
388 /// # let tmp = tempfile::NamedTempFile::new().unwrap();
389 /// # let path = tmp.path();
390 /// let mut header = Header::new();
391 /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
392 /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
393 /// # let mut record = vcf.empty_record();
394 /// assert!(record.has_filter("PASS".as_bytes()));
395 /// assert!(record.has_filter(".".as_bytes()));
396 /// assert!(record.has_filter(&Id(0)));
397 ///
398 /// record.push_filter("foo".as_bytes()).unwrap();
399 /// assert!(record.has_filter("foo".as_bytes()));
400 /// assert!(!record.has_filter("PASS".as_bytes()))
401 /// ```
402 pub fn has_filter<T: FilterId + ?Sized>(&self, flt_id: &T) -> bool {
403 if flt_id.is_pass() && self.inner().d.n_flt == 0 {
404 return true;
405 }
406 let id = match flt_id.id_from_header(self.header()) {
407 Ok(i) => *i,
408 Err(_) => return false,
409 };
410 for i in 0..(self.inner().d.n_flt as isize) {
411 if unsafe { *self.inner().d.flt.offset(i) } == id as i32 {
412 return true;
413 }
414 }
415 false
416 }
417
418 /// Set the given filter IDs to the FILTER column.
419 ///
420 /// Setting an empty slice removes all filters and sets `PASS`.
421 ///
422 /// # Example
423 /// ```rust
424 /// # use rust_htslib::bcf::{Format, Header, Writer};
425 /// # use rust_htslib::bcf::header::Id;
426 /// # use tempfile::NamedTempFile;
427 /// # let tmp = tempfile::NamedTempFile::new().unwrap();
428 /// # let path = tmp.path();
429 /// let mut header = Header::new();
430 /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
431 /// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
432 /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
433 /// # let mut record = vcf.empty_record();
434 /// let foo = record.header().name_to_id(b"foo").unwrap();
435 /// let bar = record.header().name_to_id(b"bar").unwrap();
436 /// assert!(record.has_filter("PASS".as_bytes()));
437 /// let mut filters = vec![&foo, &bar];
438 /// record.set_filters(&filters).unwrap();
439 /// assert!(record.has_filter(&foo));
440 /// assert!(record.has_filter(&bar));
441 /// assert!(!record.has_filter("PASS".as_bytes()));
442 /// filters.clear();
443 /// record.set_filters(&filters).unwrap();
444 /// assert!(record.has_filter("PASS".as_bytes()));
445 /// assert!(!record.has_filter("foo".as_bytes()));
446 /// // 'baz' isn't in the header
447 /// assert!(record.set_filters(&["baz".as_bytes()]).is_err())
448 /// ```
449 ///
450 /// # Errors
451 /// If any of the filter IDs do not exist in the header, an [`Error::BcfUnknownID`] is returned.
452 ///
453 pub fn set_filters<T: FilterId + ?Sized>(&mut self, flt_ids: &[&T]) -> Result<()> {
454 let mut ids: Vec<i32> = flt_ids
455 .iter()
456 .map(|id| id.id_from_header(self.header()).map(|id| *id as i32))
457 .collect::<Result<Vec<i32>>>()?;
458 unsafe {
459 htslib::bcf_update_filter(
460 self.header().inner,
461 self.inner,
462 ids.as_mut_ptr(),
463 ids.len() as i32,
464 );
465 };
466 Ok(())
467 }
468
469 /// Add the given filter to the FILTER column.
470 ///
471 /// If `flt_id` is `PASS` or `.` then all existing filters are removed first. Otherwise,
472 /// any existing `PASS` filter is removed.
473 ///
474 /// # Example
475 /// ```rust
476 /// # use rust_htslib::bcf::{Format, Header, Writer};
477 /// # use tempfile::NamedTempFile;
478 /// # let tmp = tempfile::NamedTempFile::new().unwrap();
479 /// # let path = tmp.path();
480 /// let mut header = Header::new();
481 /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
482 /// header.push_record(br#"##FILTER=<ID=bar,Description="dranks">"#);
483 /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
484 /// # let mut record = vcf.empty_record();
485 /// let foo = "foo".as_bytes();
486 /// let bar = record.header().name_to_id(b"bar").unwrap();
487 /// assert!(record.has_filter("PASS".as_bytes()));
488 ///
489 /// record.push_filter(foo).unwrap();
490 /// record.push_filter(&bar).unwrap();
491 /// assert!(record.has_filter(foo));
492 /// assert!(record.has_filter(&bar));
493 /// // filter must exist in the header
494 /// assert!(record.push_filter("baz".as_bytes()).is_err())
495 /// ```
496 ///
497 /// # Errors
498 /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned.
499 ///
500 pub fn push_filter<T: FilterId + ?Sized>(&mut self, flt_id: &T) -> Result<()> {
501 let id = flt_id.id_from_header(self.header())?;
502 unsafe {
503 htslib::bcf_add_filter(self.header().inner, self.inner, *id as i32);
504 };
505 Ok(())
506 }
507
508 /// Remove the given filter from the FILTER column.
509 ///
510 /// # Arguments
511 ///
512 /// - `flt_id` - The corresponding filter ID to remove.
513 /// - `pass_on_empty` - Set to `PASS` when removing the last filter.
514 ///
515 /// # Example
516 /// ```rust
517 /// # use rust_htslib::bcf::{Format, Header, Writer};
518 /// # use tempfile::NamedTempFile;
519 /// # let tmp = tempfile::NamedTempFile::new().unwrap();
520 /// # let path = tmp.path();
521 /// let mut header = Header::new();
522 /// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
523 /// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
524 /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
525 /// # let mut record = vcf.empty_record();
526 /// let foo = "foo".as_bytes();
527 /// let bar = "bar".as_bytes();
528 /// record.set_filters(&[foo, bar]).unwrap();
529 /// assert!(record.has_filter(foo));
530 /// assert!(record.has_filter(bar));
531 ///
532 /// record.remove_filter(foo, true).unwrap();
533 /// assert!(!record.has_filter(foo));
534 /// assert!(record.has_filter(bar));
535 /// // 'baz' is not in the header
536 /// assert!(record.remove_filter("baz".as_bytes(), true).is_err());
537 ///
538 /// record.remove_filter(bar, true).unwrap();
539 /// assert!(!record.has_filter(bar));
540 /// assert!(record.has_filter("PASS".as_bytes()));
541 /// ```
542 ///
543 /// # Errors
544 /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned.
545 ///
546 pub fn remove_filter<T: FilterId + ?Sized>(
547 &mut self,
548 flt_id: &T,
549 pass_on_empty: bool,
550 ) -> Result<()> {
551 let id = flt_id.id_from_header(self.header())?;
552 unsafe {
553 htslib::bcf_remove_filter(
554 self.header().inner,
555 self.inner,
556 *id as i32,
557 pass_on_empty as i32,
558 )
559 };
560 Ok(())
561 }
562
563 /// Get alleles strings.
564 ///
565 /// The first allele is the reference allele.
566 pub fn alleles(&self) -> Vec<&[u8]> {
567 unsafe { htslib::bcf_unpack(self.inner, htslib::BCF_UN_ALL as i32) };
568 let n = self.inner().n_allele() as usize;
569 let dec = self.inner().d;
570 let alleles = unsafe { slice::from_raw_parts(dec.allele, n) };
571 (0..n)
572 .map(|i| unsafe { ffi::CStr::from_ptr(alleles[i]).to_bytes() })
573 .collect()
574 }
575
576 /// Set alleles. The first allele is the reference allele.
577 ///
578 /// # Example
579 /// ```rust
580 /// # use rust_htslib::bcf::{Format, Writer};
581 /// # use rust_htslib::bcf::header::Header;
582 /// #
583 /// # // Create minimal VCF header with a single sample
584 /// # let mut header = Header::new();
585 /// # header.push_sample("sample".as_bytes());
586 /// #
587 /// # // Write uncompressed VCF to stdout with above header and get an empty record
588 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
589 /// # let mut record = vcf.empty_record();
590 /// assert_eq!(record.allele_count(), 0);
591 ///
592 /// let alleles: &[&[u8]] = &[b"A", b"TG"];
593 /// record.set_alleles(alleles).expect("Failed to set alleles");
594 /// assert_eq!(record.allele_count(), 2)
595 /// ```
596 pub fn set_alleles(&mut self, alleles: &[&[u8]]) -> Result<()> {
597 let cstrings: Vec<ffi::CString> = alleles
598 .iter()
599 .map(|vec| ffi::CString::new(*vec).unwrap())
600 .collect();
601 let mut ptrs: Vec<*const c_char> = cstrings
602 .iter()
603 .map(|cstr| cstr.as_ptr() as *const c_char)
604 .collect();
605 if unsafe {
606 htslib::bcf_update_alleles(
607 self.header().inner,
608 self.inner,
609 ptrs.as_mut_ptr(),
610 alleles.len() as i32,
611 )
612 } == 0
613 {
614 Ok(())
615 } else {
616 Err(Error::BcfSetValues)
617 }
618 }
619
620 /// Get variant quality.
621 pub fn qual(&self) -> f32 {
622 self.inner().qual
623 }
624
625 /// Set variant quality.
626 pub fn set_qual(&mut self, qual: f32) {
627 self.inner_mut().qual = qual;
628 }
629
630 pub fn info<'a>(&'a self, tag: &'a [u8]) -> Info<'a, Buffer> {
631 self.info_shared_buffer(tag, Buffer::new())
632 }
633
634 /// Get the value of the given info tag.
635 pub fn info_shared_buffer<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b>(
636 &'a self,
637 tag: &'a [u8],
638 buffer: B,
639 ) -> Info<'a, B> {
640 Info {
641 record: self,
642 tag,
643 buffer,
644 }
645 }
646
647 /// Get the number of samples in the record.
648 pub fn sample_count(&self) -> u32 {
649 self.inner().n_sample()
650 }
651
652 /// Get the number of alleles, including reference allele.
653 pub fn allele_count(&self) -> u32 {
654 self.inner().n_allele()
655 }
656
657 /// Add/replace genotypes in FORMAT GT tag.
658 ///
659 /// # Arguments
660 ///
661 /// - `genotypes` - a flattened, two-dimensional array of GenotypeAllele,
662 /// the first dimension contains one array for each sample.
663 ///
664 /// # Errors
665 ///
666 /// Returns error if GT tag is not present in header.
667 ///
668 /// # Example
669 ///
670 /// Example assumes we have a Record `record` from a VCF with a `GT` `FORMAT` tag.
671 /// See [module documentation](../index.html#example-writing) for how to set up
672 /// VCF, header, and record.
673 ///
674 /// ```
675 /// # use rust_htslib::bcf::{Format, Writer};
676 /// # use rust_htslib::bcf::header::Header;
677 /// # use rust_htslib::bcf::record::GenotypeAllele;
678 /// # let mut header = Header::new();
679 /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
680 /// # header.push_record(header_contig_line.as_bytes());
681 /// # let header_gt_line = r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#;
682 /// # header.push_record(header_gt_line.as_bytes());
683 /// # header.push_sample("test_sample".as_bytes());
684 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
685 /// # let mut record = vcf.empty_record();
686 /// let alleles = &[GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(1)];
687 /// record.push_genotypes(alleles);
688 /// assert_eq!("1/1", &format!("{}", record.genotypes().unwrap().get(0)));
689 /// ```
690 pub fn push_genotypes(&mut self, genotypes: &[GenotypeAllele]) -> Result<()> {
691 let encoded: Vec<i32> = genotypes.iter().map(|gt| i32::from(*gt)).collect();
692 self.push_format_integer(b"GT", &encoded)
693 }
694
695 /// Add/replace genotypes in FORMAT GT tag by providing a list of genotypes.
696 ///
697 /// # Arguments
698 ///
699 /// - `genotypes` - a two-dimensional array of GenotypeAllele
700 /// - `max_ploidy` - the maximum number of alleles allowed for any genotype on any sample.
701 ///
702 /// # Errors
703 ///
704 /// Returns an error if any genotype has more allelles than `max_ploidy` or if the GT tag is not present in the header.
705 ///
706 /// # Example
707 ///
708 /// Example assumes we have a Record `record` from a VCF with a `GT` `FORMAT` tag and three samples.
709 /// See [module documentation](../index.html#example-writing) for how to set up
710 /// VCF, header, and record.
711 ///
712 /// ```
713 /// # use rust_htslib::bcf::{Format, Writer};
714 /// # use rust_htslib::bcf::header::Header;
715 /// # use rust_htslib::bcf::record::GenotypeAllele;
716 /// # use std::iter;
717 /// # let mut header = Header::new();
718 /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
719 /// # header.push_record(header_contig_line.as_bytes());
720 /// # let header_gt_line = r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#;
721 /// # header.push_record(header_gt_line.as_bytes());
722 /// # header.push_sample("first_sample".as_bytes());
723 /// # header.push_sample("second_sample".as_bytes());
724 /// # header.push_sample("third_sample".as_bytes());
725 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf)?;
726 /// # let mut record = vcf.empty_record();
727 /// let alleles = vec![
728 /// vec![GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(1)],
729 /// vec![GenotypeAllele::Unphased(0), GenotypeAllele::Phased(1)],
730 /// vec![GenotypeAllele::Unphased(0)],
731 /// ];
732 /// record.push_genotype_structured(&alleles, 2);
733 /// let gts = record.genotypes()?;
734 /// assert_eq!("1/1", &format!("{}", gts.get(0)));
735 /// assert_eq!("0|1", &format!("{}", gts.get(1)));
736 /// assert_eq!("0", &format!("{}", gts.get(2)));
737 /// # Ok::<(), rust_htslib::errors::Error>(())
738 /// ```
739 pub fn push_genotype_structured<GT>(
740 &mut self,
741 genotypes: &[GT],
742 max_ploidy: usize,
743 ) -> Result<()>
744 where
745 GT: AsRef<[GenotypeAllele]>,
746 {
747 let mut data = Vec::with_capacity(max_ploidy * genotypes.len());
748 for gt in genotypes {
749 if gt.as_ref().len() > max_ploidy {
750 return Err(Error::BcfSetValues);
751 }
752 data.extend(
753 gt.as_ref()
754 .iter()
755 .map(|gta| i32::from(*gta))
756 .chain(iter::repeat_n(
757 VECTOR_END_INTEGER,
758 max_ploidy - gt.as_ref().len(),
759 )),
760 );
761 }
762 self.push_format_integer(b"GT", &data)
763 }
764
765 /// Get genotypes as vector of one `Genotype` per sample.
766 ///
767 /// # Example
768 /// Parsing genotype field (`GT` tag) from a VCF record:
769 /// ```
770 /// use crate::rust_htslib::bcf::{Reader, Read};
771 /// let mut vcf = Reader::from_path(&"test/test_string.vcf").expect("Error opening file.");
772 /// let expected = ["./1", "1|1", "0/1", "0|1", "1|.", "1/1"];
773 /// for (rec, exp_gt) in vcf.records().zip(expected.iter()) {
774 /// let mut rec = rec.expect("Error reading record.");
775 /// let genotypes = rec.genotypes().expect("Error reading genotypes");
776 /// assert_eq!(&format!("{}", genotypes.get(0)), exp_gt);
777 /// }
778 /// ```
779 pub fn genotypes(&self) -> Result<Genotypes<'_, Buffer>> {
780 self.genotypes_shared_buffer(Buffer::new())
781 }
782
783 /// Get genotypes as vector of one `Genotype` per sample, using a given shared buffer
784 /// to avoid unnecessary allocations.
785 pub fn genotypes_shared_buffer<'a, B>(&self, buffer: B) -> Result<Genotypes<'a, B>>
786 where
787 B: BorrowMut<Buffer> + Borrow<Buffer> + 'a,
788 {
789 Ok(Genotypes {
790 encoded: self.format_shared_buffer(b"GT", buffer).integer()?,
791 })
792 }
793
794 /// Retrieve data for a `FORMAT` field
795 ///
796 /// # Example
797 /// *Note: some boilerplate for the example is hidden for clarity. See [module documentation](../index.html#example-writing)
798 /// for an example of the setup used here.*
799 ///
800 /// ```rust
801 /// # use rust_htslib::bcf::{Format, Writer};
802 /// # use rust_htslib::bcf::header::Header;
803 /// #
804 /// # // Create minimal VCF header with a single sample
805 /// # let mut header = Header::new();
806 /// header.push_sample(b"sample1").push_sample(b"sample2").push_record(br#"##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">"#);
807 /// #
808 /// # // Write uncompressed VCF to stdout with above header and get an empty record
809 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
810 /// # let mut record = vcf.empty_record();
811 /// record.push_format_integer(b"DP", &[20, 12]).expect("Failed to set DP format field");
812 ///
813 /// let read_depths = record.format(b"DP").integer().expect("Couldn't retrieve DP field");
814 /// let sample1_depth = read_depths[0];
815 /// assert_eq!(sample1_depth, &[20]);
816 /// let sample2_depth = read_depths[1];
817 /// assert_eq!(sample2_depth, &[12])
818 /// ```
819 ///
820 /// # Errors
821 /// **Attention:** the returned [`BufferBacked`] from [`integer()`](Format::integer)
822 /// (`read_depths`), which holds the data, has to be kept in scope as long as the data is
823 /// accessed. If parts of the data are accessed after the `BufferBacked` object is been
824 /// dropped, you will access unallocated memory.
825 pub fn format<'a>(&'a self, tag: &'a [u8]) -> Format<'a, Buffer> {
826 self.format_shared_buffer(tag, Buffer::new())
827 }
828
829 /// Get the value of the given format tag for each sample.
830 pub fn format_shared_buffer<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b>(
831 &'a self,
832 tag: &'a [u8],
833 buffer: B,
834 ) -> Format<'a, B> {
835 Format::new(self, tag, buffer)
836 }
837
838 /// Add/replace an integer-typed FORMAT tag.
839 ///
840 /// # Arguments
841 ///
842 /// - `tag` - The tag's string.
843 /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
844 /// for each sample.
845 ///
846 /// # Errors
847 ///
848 /// Returns error if tag is not present in header.
849 pub fn push_format_integer(&mut self, tag: &[u8], data: &[i32]) -> Result<()> {
850 self.push_format(tag, data, htslib::BCF_HT_INT)
851 }
852
853 /// Add/replace a float-typed FORMAT tag.
854 ///
855 /// # Arguments
856 ///
857 /// - `tag` - The tag's string.
858 /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
859 /// for each sample.
860 ///
861 /// # Errors
862 ///
863 /// Returns error if tag is not present in header.
864 ///
865 /// # Example
866 ///
867 /// Example assumes we have a Record `record` from a VCF with an `AF` `FORMAT` tag.
868 /// See [module documentation](../index.html#example-writing) for how to set up
869 /// VCF, header, and record.
870 ///
871 /// ```
872 /// # use rust_htslib::bcf::{Format, Writer};
873 /// # use rust_htslib::bcf::header::Header;
874 /// # use rust_htslib::bcf::record::GenotypeAllele;
875 /// # let mut header = Header::new();
876 /// # let header_contig_line = r#"##contig=<ID=1,length=10>"#;
877 /// # header.push_record(header_contig_line.as_bytes());
878 /// # let header_af_line = r#"##FORMAT=<ID=AF,Number=1,Type=Float,Description="Frequency">"#;
879 /// # header.push_record(header_af_line.as_bytes());
880 /// # header.push_sample("test_sample".as_bytes());
881 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
882 /// # let mut record = vcf.empty_record();
883 /// record.push_format_float(b"AF", &[0.5]);
884 /// assert_eq!(0.5, record.format(b"AF").float().unwrap()[0][0]);
885 /// ```
886 pub fn push_format_float(&mut self, tag: &[u8], data: &[f32]) -> Result<()> {
887 self.push_format(tag, data, htslib::BCF_HT_REAL)
888 }
889
890 /// Add/replace a single-char-typed FORMAT tag.
891 ///
892 /// # Arguments
893 ///
894 /// - `tag` - The tag's string.
895 /// - `data` - a flattened, two-dimensional array, the first dimension contains one array
896 /// for each sample.
897 ///
898 /// # Errors
899 ///
900 /// Returns error if tag is not present in header.
901 pub fn push_format_char(&mut self, tag: &[u8], data: &[u8]) -> Result<()> {
902 self.push_format(tag, data, htslib::BCF_HT_STR)
903 }
904
905 /// Add a format tag. Data is a flattened two-dimensional array.
906 /// The first dimension contains one array for each sample.
907 fn push_format<T>(&mut self, tag: &[u8], data: &[T], ht: u32) -> Result<()> {
908 let tag_c_str = ffi::CString::new(tag).unwrap();
909 unsafe {
910 if htslib::bcf_update_format(
911 self.header().inner,
912 self.inner,
913 tag_c_str.as_ptr() as *mut c_char,
914 data.as_ptr() as *const ::std::os::raw::c_void,
915 data.len() as i32,
916 ht as i32,
917 ) == 0
918 {
919 Ok(())
920 } else {
921 Err(Error::BcfSetTag {
922 tag: str::from_utf8(tag).unwrap().to_owned(),
923 })
924 }
925 }
926 }
927
928 // TODO: should we add convenience methods clear_format_*?
929
930 /// Add a string-typed FORMAT tag. Note that genotypes are treated as a special case
931 /// and cannot be added with this method. See instead [push_genotypes](#method.push_genotypes).
932 ///
933 /// # Arguments
934 ///
935 /// - `tag` - The tag's string.
936 /// - `data` - a two-dimensional array, the first dimension contains one array
937 /// for each sample. Must be non-empty.
938 ///
939 /// # Errors
940 ///
941 /// Returns error if tag is not present in header.
942 pub fn push_format_string<D: Borrow<[u8]>>(&mut self, tag: &[u8], data: &[D]) -> Result<()> {
943 assert!(
944 !data.is_empty(),
945 "given string data must have at least 1 element"
946 );
947 let c_data = data
948 .iter()
949 .map(|s| ffi::CString::new(s.borrow()).unwrap())
950 .collect::<Vec<ffi::CString>>();
951 let c_ptrs = c_data
952 .iter()
953 .map(|s| s.as_ptr() as *mut i8)
954 .collect::<Vec<*mut i8>>();
955 let tag_c_str = ffi::CString::new(tag).unwrap();
956 unsafe {
957 if htslib::bcf_update_format_string(
958 self.header().inner,
959 self.inner,
960 tag_c_str.as_ptr() as *mut c_char,
961 c_ptrs.as_slice().as_ptr() as *mut *const c_char,
962 data.len() as i32,
963 ) == 0
964 {
965 Ok(())
966 } else {
967 Err(Error::BcfSetTag {
968 tag: str::from_utf8(tag).unwrap().to_owned(),
969 })
970 }
971 }
972 }
973
974 /// Add/replace an integer-typed INFO entry.
975 pub fn push_info_integer(&mut self, tag: &[u8], data: &[i32]) -> Result<()> {
976 self.push_info(tag, data, htslib::BCF_HT_INT)
977 }
978
979 /// Remove the integer-typed INFO entry.
980 pub fn clear_info_integer(&mut self, tag: &[u8]) -> Result<()> {
981 self.push_info::<i32>(tag, &[], htslib::BCF_HT_INT)
982 }
983
984 /// Add/replace a float-typed INFO entry.
985 pub fn push_info_float(&mut self, tag: &[u8], data: &[f32]) -> Result<()> {
986 self.push_info(tag, data, htslib::BCF_HT_REAL)
987 }
988
989 /// Remove the float-typed INFO entry.
990 pub fn clear_info_float(&mut self, tag: &[u8]) -> Result<()> {
991 self.push_info::<u8>(tag, &[], htslib::BCF_HT_REAL)
992 }
993
994 /// Add/replace an INFO tag.
995 ///
996 /// # Arguments
997 /// * `tag` - the tag to add/replace
998 /// * `data` - the data to set
999 /// * `ht` - the HTSLib type to use
1000 fn push_info<T>(&mut self, tag: &[u8], data: &[T], ht: u32) -> Result<()> {
1001 let tag_c_str = ffi::CString::new(tag).unwrap();
1002 unsafe {
1003 if htslib::bcf_update_info(
1004 self.header().inner,
1005 self.inner,
1006 tag_c_str.as_ptr() as *mut c_char,
1007 data.as_ptr() as *const ::std::os::raw::c_void,
1008 data.len() as i32,
1009 ht as i32,
1010 ) == 0
1011 {
1012 Ok(())
1013 } else {
1014 Err(Error::BcfSetTag {
1015 tag: str::from_utf8(tag).unwrap().to_owned(),
1016 })
1017 }
1018 }
1019 }
1020
1021 /// Set flag into the INFO column.
1022 pub fn push_info_flag(&mut self, tag: &[u8]) -> Result<()> {
1023 self.push_info_string_impl(tag, &[b""], htslib::BCF_HT_FLAG)
1024 }
1025
1026 /// Remove the flag from the INFO column.
1027 pub fn clear_info_flag(&mut self, tag: &[u8]) -> Result<()> {
1028 self.push_info_string_impl(tag, &[], htslib::BCF_HT_FLAG)
1029 }
1030
1031 /// Add/replace a string-typed INFO entry.
1032 pub fn push_info_string(&mut self, tag: &[u8], data: &[&[u8]]) -> Result<()> {
1033 self.push_info_string_impl(tag, data, htslib::BCF_HT_STR)
1034 }
1035
1036 /// Remove the string field from the INFO column.
1037 pub fn clear_info_string(&mut self, tag: &[u8]) -> Result<()> {
1038 self.push_info_string_impl(tag, &[], htslib::BCF_HT_STR)
1039 }
1040
1041 /// Add an string-valued INFO tag.
1042 fn push_info_string_impl(&mut self, tag: &[u8], data: &[&[u8]], ht: u32) -> Result<()> {
1043 let mut buf: Vec<u8> = Vec::new();
1044 for (i, &s) in data.iter().enumerate() {
1045 if i > 0 {
1046 buf.extend(b",");
1047 }
1048 buf.extend(s);
1049 }
1050 let c_str = ffi::CString::new(buf).unwrap();
1051 let len = if ht == htslib::BCF_HT_FLAG {
1052 data.len()
1053 } else {
1054 c_str.to_bytes().len()
1055 };
1056 let tag_c_str = ffi::CString::new(tag).unwrap();
1057 unsafe {
1058 if htslib::bcf_update_info(
1059 self.header().inner,
1060 self.inner,
1061 tag_c_str.as_ptr() as *mut c_char,
1062 c_str.as_ptr() as *const ::std::os::raw::c_void,
1063 len as i32,
1064 ht as i32,
1065 ) == 0
1066 {
1067 Ok(())
1068 } else {
1069 Err(Error::BcfSetTag {
1070 tag: str::from_utf8(tag).unwrap().to_owned(),
1071 })
1072 }
1073 }
1074 }
1075
1076 /// Remove unused alleles.
1077 pub fn trim_alleles(&mut self) -> Result<()> {
1078 match unsafe { htslib::bcf_trim_alleles(self.header().inner, self.inner) } {
1079 -1 => Err(Error::BcfRemoveAlleles),
1080 _ => Ok(()),
1081 }
1082 }
1083
1084 pub fn remove_alleles(&mut self, remove: &[bool]) -> Result<()> {
1085 let rm_set = unsafe { htslib::kbs_init(remove.len()) };
1086
1087 for (i, &r) in remove.iter().enumerate() {
1088 if r {
1089 unsafe {
1090 htslib::kbs_insert(rm_set, i as i32);
1091 }
1092 }
1093 }
1094
1095 let ret = unsafe { htslib::bcf_remove_allele_set(self.header().inner, self.inner, rm_set) };
1096
1097 unsafe {
1098 htslib::kbs_destroy(rm_set);
1099 }
1100
1101 match ret {
1102 -1 => Err(Error::BcfRemoveAlleles),
1103 _ => Ok(()),
1104 }
1105 }
1106
1107 /// Get the length of the reference allele. If the record has no reference allele, then the
1108 /// result will be `0`.
1109 ///
1110 /// # Example
1111 /// ```rust
1112 /// # use rust_htslib::bcf::{Format, Writer};
1113 /// # use rust_htslib::bcf::header::Header;
1114 /// #
1115 /// # // Create minimal VCF header with a single sample
1116 /// # let mut header = Header::new();
1117 /// # header.push_sample("sample".as_bytes());
1118 /// #
1119 /// # // Write uncompressed VCF to stdout with above header and get an empty record
1120 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
1121 /// # let mut record = vcf.empty_record();
1122 /// # assert_eq!(record.rlen(), 0);
1123 /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1124 /// record.set_alleles(alleles).expect("Failed to set alleles");
1125 /// assert_eq!(record.rlen(), 3)
1126 /// ```
1127 pub fn rlen(&self) -> i64 {
1128 self.inner().rlen
1129 }
1130
1131 /// Clear all parts of the record. Useful if you plan to reuse a record object multiple times.
1132 ///
1133 /// # Example
1134 /// ```rust
1135 /// # use rust_htslib::bcf::{Format, Writer};
1136 /// # use rust_htslib::bcf::header::Header;
1137 /// #
1138 /// # // Create minimal VCF header with a single sample
1139 /// # let mut header = Header::new();
1140 /// # header.push_sample("sample".as_bytes());
1141 /// #
1142 /// # // Write uncompressed VCF to stdout with above header and get an empty record
1143 /// # let mut vcf = Writer::from_stdout(&header, true, Format::Vcf).unwrap();
1144 /// # let mut record = vcf.empty_record();
1145 /// let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1146 /// record.set_alleles(alleles).expect("Failed to set alleles");
1147 /// record.set_pos(6);
1148 /// record.clear();
1149 /// assert_eq!(record.rlen(), 0);
1150 /// assert_eq!(record.pos(), 0)
1151 /// ```
1152 pub fn clear(&self) {
1153 unsafe { htslib::bcf_clear(self.inner) }
1154 }
1155
1156 /// Provide short description of record for locating it in the BCF/VCF file.
1157 pub fn desc(&self) -> String {
1158 if let Some(rid) = self.rid() {
1159 if let Ok(contig) = self.header.rid2name(rid) {
1160 return format!("{}:{}", str::from_utf8(contig).unwrap(), self.pos());
1161 }
1162 }
1163 "".to_owned()
1164 }
1165
1166 /// Convert to VCF String
1167 ///
1168 /// Intended for debug only. Use Writer for efficient VCF output.
1169 ///
1170 pub fn to_vcf_string(&self) -> Result<String> {
1171 let mut buf = htslib::kstring_t {
1172 l: 0,
1173 m: 0,
1174 s: ptr::null_mut(),
1175 };
1176 let ret = unsafe { htslib::vcf_format(self.header().inner, self.inner, &mut buf) };
1177
1178 if ret < 0 {
1179 if !buf.s.is_null() {
1180 unsafe {
1181 libc::free(buf.s as *mut libc::c_void);
1182 }
1183 }
1184 return Err(Error::BcfToString);
1185 }
1186
1187 let vcf_str = unsafe {
1188 let vcf_str = String::from(ffi::CStr::from_ptr(buf.s).to_str().unwrap());
1189 if !buf.s.is_null() {
1190 libc::free(buf.s as *mut libc::c_void);
1191 }
1192 vcf_str
1193 };
1194
1195 Ok(vcf_str)
1196 }
1197}
1198
1199impl Clone for Record {
1200 fn clone(&self) -> Self {
1201 let inner = unsafe { htslib::bcf_dup(self.inner) };
1202 Record {
1203 inner,
1204 header: self.header.clone(),
1205 }
1206 }
1207}
1208
1209impl genome::AbstractLocus for Record {
1210 fn contig(&self) -> &str {
1211 str::from_utf8(
1212 self.header()
1213 .rid2name(self.rid().expect("rid not set"))
1214 .expect("unable to find rid in header"),
1215 )
1216 .expect("unable to interpret contig name as UTF-8")
1217 }
1218
1219 fn pos(&self) -> u64 {
1220 self.pos() as u64
1221 }
1222}
1223
1224/// Phased or unphased alleles, represented as indices.
1225#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1226pub enum GenotypeAllele {
1227 Unphased(i32),
1228 Phased(i32),
1229 UnphasedMissing,
1230 PhasedMissing,
1231}
1232
1233impl GenotypeAllele {
1234 /// Decode given integer according to BCF standard.
1235 #[deprecated(
1236 since = "0.36.0",
1237 note = "Please use the conversion trait From<i32> for GenotypeAllele instead."
1238 )]
1239 pub fn from_encoded(encoded: i32) -> Self {
1240 match (encoded, encoded & 1) {
1241 (0, 0) => GenotypeAllele::UnphasedMissing,
1242 (1, 1) => GenotypeAllele::PhasedMissing,
1243 (e, 1) => GenotypeAllele::Phased((e >> 1) - 1),
1244 (e, 0) => GenotypeAllele::Unphased((e >> 1) - 1),
1245 _ => panic!("unexpected phasing type"),
1246 }
1247 }
1248
1249 /// Get the index into the list of alleles.
1250 pub fn index(self) -> Option<u32> {
1251 match self {
1252 GenotypeAllele::Unphased(i) | GenotypeAllele::Phased(i) => Some(i as u32),
1253 GenotypeAllele::UnphasedMissing | GenotypeAllele::PhasedMissing => None,
1254 }
1255 }
1256}
1257
1258impl fmt::Display for GenotypeAllele {
1259 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1260 match self.index() {
1261 Some(a) => write!(f, "{}", a),
1262 None => write!(f, "."),
1263 }
1264 }
1265}
1266
1267impl From<GenotypeAllele> for i32 {
1268 fn from(allele: GenotypeAllele) -> i32 {
1269 let (allele, phased) = match allele {
1270 GenotypeAllele::UnphasedMissing => (-1, 0),
1271 GenotypeAllele::PhasedMissing => (-1, 1),
1272 GenotypeAllele::Unphased(a) => (a, 0),
1273 GenotypeAllele::Phased(a) => (a, 1),
1274 };
1275 ((allele + 1) << 1) | phased
1276 }
1277}
1278
1279impl From<i32> for GenotypeAllele {
1280 fn from(encoded: i32) -> GenotypeAllele {
1281 match (encoded, encoded & 1) {
1282 (0, 0) => GenotypeAllele::UnphasedMissing,
1283 (1, 1) => GenotypeAllele::PhasedMissing,
1284 (e, 1) => GenotypeAllele::Phased((e >> 1) - 1),
1285 (e, 0) => GenotypeAllele::Unphased((e >> 1) - 1),
1286 _ => panic!("unexpected phasing type"),
1287 }
1288 }
1289}
1290
1291custom_derive! {
1292 /// Genotype representation as a vector of `GenotypeAllele`.
1293 #[derive(NewtypeDeref, Debug, Clone, PartialEq, Eq, Hash)]
1294 pub struct Genotype(Vec<GenotypeAllele>);
1295}
1296
1297impl fmt::Display for Genotype {
1298 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1299 let Genotype(alleles) = self;
1300 write!(f, "{}", alleles[0])?;
1301 for a in &alleles[1..] {
1302 let sep = match a {
1303 GenotypeAllele::Phased(_) | GenotypeAllele::PhasedMissing => '|',
1304 GenotypeAllele::Unphased(_) | GenotypeAllele::UnphasedMissing => '/',
1305 };
1306 write!(f, "{}{}", sep, a)?;
1307 }
1308 Ok(())
1309 }
1310}
1311
1312/// Lazy representation of genotypes, that does no computation until a particular genotype is queried.
1313#[derive(Debug)]
1314pub struct Genotypes<'a, B>
1315where
1316 B: Borrow<Buffer> + 'a,
1317{
1318 encoded: BufferBacked<'a, Vec<&'a [i32]>, B>,
1319}
1320
1321impl<'a, B: Borrow<Buffer> + 'a> Genotypes<'a, B> {
1322 /// Get genotype of ith sample.
1323 ///
1324 /// Note that the result complies with the BCF spec. This means that the
1325 /// first allele will always be marked as `Unphased`. That is, if you have 1|1 in the VCF,
1326 /// this method will return `[Unphased(1), Phased(1)]`.
1327 pub fn get(&self, i: usize) -> Genotype {
1328 let igt = self.encoded[i];
1329 let allelles = igt
1330 .iter()
1331 .take_while(|&&i| i != VECTOR_END_INTEGER)
1332 .map(|&i| GenotypeAllele::from(i))
1333 .collect();
1334 Genotype(allelles)
1335 }
1336}
1337
1338impl Drop for Record {
1339 fn drop(&mut self) {
1340 unsafe { htslib::bcf_destroy(self.inner) };
1341 }
1342}
1343
1344unsafe impl Send for Record {}
1345
1346unsafe impl Sync for Record {}
1347
1348/// Info tag representation.
1349#[derive(Debug)]
1350pub struct Info<'a, B: BorrowMut<Buffer> + Borrow<Buffer>> {
1351 record: &'a Record,
1352 tag: &'a [u8],
1353 buffer: B,
1354}
1355
1356pub type BufferBackedOption<'b, B> = Option<BufferBacked<'b, Vec<&'b [u8]>, B>>;
1357
1358impl<'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Info<'_, B> {
1359 /// Short description of info tag.
1360 pub fn desc(&self) -> String {
1361 str::from_utf8(self.tag).unwrap().to_owned()
1362 }
1363
1364 fn data(&mut self, data_type: u32) -> Result<Option<i32>> {
1365 let mut n: i32 = self.buffer.borrow().len;
1366 let c_str = ffi::CString::new(self.tag).unwrap();
1367 let ret = unsafe {
1368 htslib::bcf_get_info_values(
1369 self.record.header().inner,
1370 self.record.inner,
1371 c_str.as_ptr() as *mut c_char,
1372 &mut self.buffer.borrow_mut().inner,
1373 &mut n,
1374 data_type as i32,
1375 )
1376 };
1377 self.buffer.borrow_mut().len = n;
1378
1379 match ret {
1380 -1 => Err(Error::BcfUndefinedTag { tag: self.desc() }),
1381 -2 => Err(Error::BcfUnexpectedType { tag: self.desc() }),
1382 -3 => Ok(None),
1383 ret => Ok(Some(ret)),
1384 }
1385 }
1386
1387 /// Get integers from tag. `None` if tag not present in record.
1388 ///
1389 /// Import `bcf::record::Numeric` for missing value handling.
1390 ///
1391 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1392 /// as along as the data is accessed. If parts of the data are accessed while
1393 /// the BufferBacked object is already dropped, you will access unallocated
1394 /// memory.
1395 pub fn integer(mut self) -> Result<Option<BufferBacked<'b, &'b [i32], B>>> {
1396 self.data(htslib::BCF_HT_INT).map(|data| {
1397 data.map(|ret| {
1398 let values = unsafe {
1399 slice::from_raw_parts(self.buffer.borrow().inner as *const i32, ret as usize)
1400 };
1401 BufferBacked::new(&values[..ret as usize], self.buffer)
1402 })
1403 })
1404 }
1405
1406 /// Get floats from tag. `None` if tag not present in record.
1407 ///
1408 /// Import `bcf::record::Numeric` for missing value handling.
1409 ///
1410 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1411 /// as along as the data is accessed. If parts of the data are accessed while
1412 /// the BufferBacked object is already dropped, you will access unallocated
1413 /// memory.
1414 pub fn float(mut self) -> Result<Option<BufferBacked<'b, &'b [f32], B>>> {
1415 self.data(htslib::BCF_HT_REAL).map(|data| {
1416 data.map(|ret| {
1417 let values = unsafe {
1418 slice::from_raw_parts(self.buffer.borrow().inner as *const f32, ret as usize)
1419 };
1420 BufferBacked::new(&values[..ret as usize], self.buffer)
1421 })
1422 })
1423 }
1424
1425 /// Get flags from tag. `false` if not set.
1426 pub fn flag(&mut self) -> Result<bool> {
1427 self.data(htslib::BCF_HT_FLAG).map(|data| match data {
1428 Some(ret) => ret == 1,
1429 None => false,
1430 })
1431 }
1432
1433 /// Get strings from tag. `None` if tag not present in record.
1434 ///
1435 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1436 /// as along as the data is accessed. If parts of the data are accessed while
1437 /// the BufferBacked object is already dropped, you will access unallocated
1438 /// memory.
1439 pub fn string(mut self) -> Result<BufferBackedOption<'b, B>> {
1440 self.data(htslib::BCF_HT_STR).map(|data| {
1441 data.map(|ret| {
1442 BufferBacked::new(
1443 unsafe {
1444 slice::from_raw_parts(self.buffer.borrow().inner as *const u8, ret as usize)
1445 }
1446 .split(|c| *c == b',')
1447 .map(|s| {
1448 // stop at zero character
1449 s.split(|c| *c == 0u8)
1450 .next()
1451 .expect("Bug: returned string should not be empty.")
1452 })
1453 .collect(),
1454 self.buffer,
1455 )
1456 })
1457 })
1458 }
1459}
1460
1461unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Send for Info<'_, B> {}
1462
1463unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Sync for Info<'_, B> {}
1464
1465fn trim_slice<T: PartialEq + NumericUtils>(s: &[T]) -> &[T] {
1466 s.split(|v| v.is_vector_end())
1467 .next()
1468 .expect("Bug: returned slice should not be empty.")
1469}
1470
1471// Representation of per-sample data.
1472#[derive(Debug)]
1473pub struct Format<'a, B: BorrowMut<Buffer> + Borrow<Buffer>> {
1474 record: &'a Record,
1475 tag: &'a [u8],
1476 inner: *mut htslib::bcf_fmt_t,
1477 buffer: B,
1478}
1479
1480impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Format<'a, B> {
1481 /// Create new format data in a given record.
1482 fn new(record: &'a Record, tag: &'a [u8], buffer: B) -> Format<'a, B> {
1483 let c_str = ffi::CString::new(tag).unwrap();
1484 let inner = unsafe {
1485 htslib::bcf_get_fmt(
1486 record.header().inner,
1487 record.inner,
1488 c_str.as_ptr() as *mut c_char,
1489 )
1490 };
1491 Format {
1492 record,
1493 tag,
1494 inner,
1495 buffer,
1496 }
1497 }
1498
1499 /// Provide short description of format entry (just the tag name).
1500 pub fn desc(&self) -> String {
1501 str::from_utf8(self.tag).unwrap().to_owned()
1502 }
1503
1504 pub fn inner(&self) -> &htslib::bcf_fmt_t {
1505 unsafe { &*self.inner }
1506 }
1507
1508 pub fn inner_mut(&mut self) -> &mut htslib::bcf_fmt_t {
1509 unsafe { &mut *self.inner }
1510 }
1511
1512 fn values_per_sample(&self) -> usize {
1513 self.inner().n as usize
1514 }
1515
1516 /// Read and decode format data into a given type.
1517 fn data(&mut self, data_type: u32) -> Result<i32> {
1518 let mut n: i32 = self.buffer.borrow().len;
1519 let c_str = ffi::CString::new(self.tag).unwrap();
1520 let ret = unsafe {
1521 htslib::bcf_get_format_values(
1522 self.record.header().inner,
1523 self.record.inner,
1524 c_str.as_ptr() as *mut c_char,
1525 &mut self.buffer.borrow_mut().inner,
1526 &mut n,
1527 data_type as i32,
1528 )
1529 };
1530 self.buffer.borrow_mut().len = n;
1531 match ret {
1532 -1 => Err(Error::BcfUndefinedTag { tag: self.desc() }),
1533 -2 => Err(Error::BcfUnexpectedType { tag: self.desc() }),
1534 -3 => Err(Error::BcfMissingTag {
1535 tag: self.desc(),
1536 record: self.record.desc(),
1537 }),
1538 ret => Ok(ret),
1539 }
1540 }
1541
1542 /// Get format data as integers.
1543 ///
1544 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1545 /// as long as the data is accessed. If parts of the data are accessed while
1546 /// the BufferBacked object is already dropped, you will access unallocated
1547 /// memory.
1548 pub fn integer(mut self) -> Result<BufferBacked<'b, Vec<&'b [i32]>, B>> {
1549 self.data(htslib::BCF_HT_INT).map(|ret| {
1550 BufferBacked::new(
1551 unsafe {
1552 slice::from_raw_parts(
1553 self.buffer.borrow_mut().inner as *const i32,
1554 ret as usize,
1555 )
1556 }
1557 .chunks(self.values_per_sample())
1558 .map(trim_slice)
1559 .collect(),
1560 self.buffer,
1561 )
1562 })
1563 }
1564
1565 /// Get format data as floats.
1566 ///
1567 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1568 /// as along as the data is accessed. If parts of the data are accessed while
1569 /// the BufferBacked object is already dropped, you will access unallocated
1570 /// memory.
1571 pub fn float(mut self) -> Result<BufferBacked<'b, Vec<&'b [f32]>, B>> {
1572 self.data(htslib::BCF_HT_REAL).map(|ret| {
1573 BufferBacked::new(
1574 unsafe {
1575 slice::from_raw_parts(
1576 self.buffer.borrow_mut().inner as *const f32,
1577 ret as usize,
1578 )
1579 }
1580 .chunks(self.values_per_sample())
1581 .map(trim_slice)
1582 .collect(),
1583 self.buffer,
1584 )
1585 })
1586 }
1587
1588 /// Get format data as byte slices. To obtain the values strings, use `std::str::from_utf8`.
1589 ///
1590 /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
1591 /// as along as the data is accessed. If parts of the data are accessed while
1592 /// the BufferBacked object is already dropped, you will access unallocated
1593 /// memory.
1594 pub fn string(mut self) -> Result<BufferBacked<'b, Vec<&'b [u8]>, B>> {
1595 self.data(htslib::BCF_HT_STR).map(|ret| {
1596 if ret == 0 {
1597 return BufferBacked::new(Vec::new(), self.buffer);
1598 }
1599 BufferBacked::new(
1600 unsafe {
1601 slice::from_raw_parts(self.buffer.borrow_mut().inner as *const u8, ret as usize)
1602 }
1603 .chunks(self.values_per_sample())
1604 .map(|s| {
1605 // stop at zero character
1606 s.split(|c| *c == 0u8)
1607 .next()
1608 .expect("Bug: returned string should not be empty.")
1609 })
1610 .collect(),
1611 self.buffer,
1612 )
1613 })
1614 }
1615}
1616
1617unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Send for Format<'_, B> {}
1618
1619unsafe impl<B: BorrowMut<Buffer> + Borrow<Buffer>> Sync for Format<'_, B> {}
1620
1621#[derive(Debug)]
1622pub struct Filters<'a> {
1623 /// Reference to the `Record` to enumerate records for.
1624 record: &'a Record,
1625 /// Index of the next filter to return, if not at end.
1626 idx: i32,
1627}
1628
1629impl<'a> Filters<'a> {
1630 pub fn new(record: &'a Record) -> Self {
1631 Filters { record, idx: 0 }
1632 }
1633}
1634
1635impl Iterator for Filters<'_> {
1636 type Item = Id;
1637
1638 fn next(&mut self) -> Option<Id> {
1639 if self.record.inner().d.n_flt <= self.idx {
1640 None
1641 } else {
1642 let i = self.idx as isize;
1643 self.idx += 1;
1644 Some(Id(unsafe { *self.record.inner().d.flt.offset(i) } as u32))
1645 }
1646 }
1647}
1648
1649#[cfg(test)]
1650mod tests {
1651 use super::*;
1652 use crate::bcf::{Format, Header, Writer};
1653 use tempfile::NamedTempFile;
1654
1655 #[test]
1656 fn test_missing_float() {
1657 let expected: u32 = 0x7F80_0001;
1658 assert_eq!(MISSING_FLOAT.bits(), expected);
1659 }
1660
1661 #[test]
1662 fn test_vector_end_float() {
1663 let expected: u32 = 0x7F80_0002;
1664 assert_eq!(VECTOR_END_FLOAT.bits(), expected);
1665 }
1666
1667 #[test]
1668 fn test_record_rlen() {
1669 let tmp = NamedTempFile::new().unwrap();
1670 let path = tmp.path();
1671 let header = Header::new();
1672 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1673 let mut record = vcf.empty_record();
1674 assert_eq!(record.rlen(), 0);
1675 let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1676 record.set_alleles(alleles).expect("Failed to set alleles");
1677 assert_eq!(record.rlen(), 3)
1678 }
1679
1680 #[test]
1681 fn test_record_end() {
1682 let tmp = NamedTempFile::new().unwrap();
1683 let path = tmp.path();
1684 let header = Header::new();
1685 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1686 let mut record = vcf.empty_record();
1687 let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1688 record.set_alleles(alleles).expect("Failed to set alleles");
1689 record.set_pos(5);
1690
1691 assert_eq!(record.end(), 8)
1692 }
1693
1694 #[test]
1695 fn test_record_clear() {
1696 let tmp = NamedTempFile::new().unwrap();
1697 let path = tmp.path();
1698 let mut header = Header::new();
1699 header.push_sample("sample".as_bytes());
1700 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1701 let mut record = vcf.empty_record();
1702 let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1703 record.set_alleles(alleles).expect("Failed to set alleles");
1704 record.set_pos(6);
1705 record.clear();
1706
1707 assert_eq!(record.rlen(), 0);
1708 assert_eq!(record.sample_count(), 0);
1709 assert_eq!(record.pos(), 0)
1710 }
1711
1712 #[test]
1713 fn test_record_clone() {
1714 let tmp = NamedTempFile::new().unwrap();
1715 let path = tmp.path();
1716 let header = Header::new();
1717 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1718 let mut record = vcf.empty_record();
1719 let alleles: &[&[u8]] = &[b"AGG", b"TG"];
1720 record.set_alleles(alleles).expect("Failed to set alleles");
1721 record.set_pos(6);
1722
1723 let mut cloned_record = record.clone();
1724 cloned_record.set_pos(5);
1725
1726 assert_eq!(record.pos(), 6);
1727 assert_eq!(record.allele_count(), 2);
1728 assert_eq!(cloned_record.pos(), 5);
1729 assert_eq!(cloned_record.allele_count(), 2);
1730 }
1731
1732 #[test]
1733 fn test_record_has_filter_pass_is_default() {
1734 let tmp = NamedTempFile::new().unwrap();
1735 let path = tmp.path();
1736 let header = Header::new();
1737 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1738 let record = vcf.empty_record();
1739
1740 assert!(record.has_filter("PASS".as_bytes()));
1741 assert!(record.has_filter(".".as_bytes()));
1742 assert!(record.has_filter(&Id(0)));
1743 assert!(!record.has_filter("foo".as_bytes()));
1744 assert!(!record.has_filter(&Id(2)));
1745 }
1746
1747 #[test]
1748 fn test_record_has_filter_custom() {
1749 let tmp = NamedTempFile::new().unwrap();
1750 let path = tmp.path();
1751 let mut header = Header::new();
1752 header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1753 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1754 let mut record = vcf.empty_record();
1755 record.push_filter("foo".as_bytes()).unwrap();
1756
1757 assert!(record.has_filter("foo".as_bytes()));
1758 assert!(!record.has_filter("PASS".as_bytes()))
1759 }
1760
1761 #[test]
1762 fn test_record_push_filter() {
1763 let tmp = NamedTempFile::new().unwrap();
1764 let path = tmp.path();
1765 let mut header = Header::new();
1766 header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1767 header.push_record(br#"##FILTER=<ID=bar,Description="dranks">"#);
1768 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1769 let mut record = vcf.empty_record();
1770 assert!(record.has_filter("PASS".as_bytes()));
1771 record.push_filter("foo".as_bytes()).unwrap();
1772 let bar = record.header().name_to_id(b"bar").unwrap();
1773 record.push_filter(&bar).unwrap();
1774 assert!(record.has_filter("foo".as_bytes()));
1775 assert!(record.has_filter(&bar));
1776 assert!(!record.has_filter("PASS".as_bytes()));
1777 assert!(record.push_filter("baz".as_bytes()).is_err())
1778 }
1779
1780 #[test]
1781 fn test_record_set_filters() {
1782 let tmp = NamedTempFile::new().unwrap();
1783 let path = tmp.path();
1784 let mut header = Header::new();
1785 header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1786 header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
1787 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1788 let mut record = vcf.empty_record();
1789 assert!(record.has_filter("PASS".as_bytes()));
1790 record
1791 .set_filters(&["foo".as_bytes(), "bar".as_bytes()])
1792 .unwrap();
1793 assert!(record.has_filter("foo".as_bytes()));
1794 assert!(record.has_filter("bar".as_bytes()));
1795 assert!(!record.has_filter("PASS".as_bytes()));
1796 let filters: &[&Id] = &[];
1797 record.set_filters(filters).unwrap();
1798 assert!(record.has_filter("PASS".as_bytes()));
1799 assert!(!record.has_filter("foo".as_bytes()));
1800 assert!(record
1801 .set_filters(&["foo".as_bytes(), "baz".as_bytes()])
1802 .is_err())
1803 }
1804
1805 #[test]
1806 fn test_record_remove_filter() {
1807 let tmp = NamedTempFile::new().unwrap();
1808 let path = tmp.path();
1809 let mut header = Header::new();
1810 header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1811 header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
1812 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1813 let mut record = vcf.empty_record();
1814 let foo = record.header().name_to_id(b"foo").unwrap();
1815 let bar = record.header().name_to_id(b"bar").unwrap();
1816 record.set_filters(&[&foo, &bar]).unwrap();
1817 assert!(record.has_filter(&foo));
1818 assert!(record.has_filter(&bar));
1819 record.remove_filter(&foo, true).unwrap();
1820 assert!(!record.has_filter(&foo));
1821 assert!(record.has_filter(&bar));
1822 assert!(record.remove_filter("baz".as_bytes(), true).is_err());
1823 record.remove_filter(&bar, true).unwrap();
1824 assert!(!record.has_filter(&bar));
1825 assert!(record.has_filter("PASS".as_bytes()));
1826 }
1827
1828 #[test]
1829 fn test_record_to_vcf_string_err() {
1830 let tmp = NamedTempFile::new().unwrap();
1831 let path = tmp.path();
1832 let header = Header::new();
1833 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1834 let record = vcf.empty_record();
1835 assert!(record.to_vcf_string().is_err());
1836 }
1837
1838 #[test]
1839 fn test_record_to_vcf_string() {
1840 let tmp = NamedTempFile::new().unwrap();
1841 let path = tmp.path();
1842 let mut header = Header::new();
1843 header.push_record(b"##contig=<ID=chr1,length=1000>");
1844 header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
1845 let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap();
1846 let mut record = vcf.empty_record();
1847 record.push_filter("foo".as_bytes()).unwrap();
1848 assert_eq!(
1849 record.to_vcf_string().unwrap(),
1850 "chr1\t1\t.\t.\t.\t0\tfoo\t.\n"
1851 );
1852 }
1853}