bam_builder/
sequence_dict.rs

1//! Wrapper over [`rust_htslib::bam::header`], see [`SequenceDict`].
2//!
3//! The intention is to turn this into something like Picards's SequenceDictionary at
4//! some future point.
5#![warn(missing_docs)]
6use rust_htslib::bam::header::{Header, HeaderRecord};
7
8/// Hold relevant info about the sequences, translates into a header.
9///
10/// # Example
11/// ```
12/// use bam_builder::sequence_dict::{SequenceDict, SequenceData};
13/// let sequences = (1..22)
14/// .map(|chr| chr.to_string())
15/// .chain(vec!["X", "Y", "M"].into_iter().map(|chr| chr.to_string()))
16/// .map(|chr| SequenceData {
17///     name: format!("chr{}", chr),
18///     length: 2_000_000,
19/// })
20/// .collect();
21/// let sd = SequenceDict { sequences };
22/// assert_eq!(sd.sequences[0].name, String::from("chr1"));
23/// ```
24#[derive(Debug)]
25pub struct SequenceDict {
26    /// Ordered list of [`SequenceData`].
27    pub sequences: Vec<SequenceData>,
28}
29
30/// Holds relevant information about reference sequences
31// Note: At a future point spin this out into its own lib and support Sequence Dict
32#[derive(Debug)]
33pub struct SequenceData {
34    /// Name of the sequence.
35    pub name: String,
36    /// Length of the sequence.
37    pub length: usize,
38}
39
40impl From<SequenceData> for HeaderRecord<'_> {
41    fn from(meta: SequenceData) -> Self {
42        let mut rec = HeaderRecord::new("SQ".as_bytes());
43        rec.push_tag("SN".as_bytes(), &meta.name);
44        rec.push_tag("LN".as_bytes(), meta.length);
45        rec
46    }
47}
48
49impl Default for SequenceDict {
50    fn default() -> Self {
51        let sequences = (1..22)
52            .map(|chr| chr.to_string())
53            .chain(vec!["X", "Y", "M"].into_iter().map(|chr| chr.to_string()))
54            .map(|chr| SequenceData {
55                name: format!("chr{}", chr),
56                length: 2_000_000,
57            })
58            .collect();
59        SequenceDict { sequences }
60    }
61}
62
63impl From<SequenceDict> for Header {
64    fn from(meta: SequenceDict) -> Header {
65        let mut header = Header::new();
66        for seq in meta.sequences.into_iter() {
67            header.push_record(&seq.into());
68        }
69        header
70    }
71}