1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
//! Alignment record data field tag.

use std::{borrow::Borrow, fmt};

use bstr::ByteSlice;

/// An alignment record data field tag.
#[derive(Clone, Copy, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct Tag([u8; 2]);

impl Tag {
    /// The smallest template-independent mapping quality in the template (`AM`).
    pub const MIN_MAPPING_QUALITY: Self = Self::new(b'A', b'M');

    /// Alignment score generated by aligner (`AS`).
    pub const ALIGNMENT_SCORE: Self = Self::new(b'A', b'S');

    /// Barcode sequence identifying the sample (`BC`).
    pub const SAMPLE_BARCODE_SEQUENCE: Self = Self::new(b'B', b'C');

    /// Offset to base alignment quality (BAQ) (`BQ`).
    pub const BASE_ALIGNMENT_QUALITY_OFFSETS: Self = Self::new(b'B', b'Q');

    /// Phred quality of the unique molecular barcode bases in the `OX` tag (`BZ`).
    pub const ORIGINAL_UMI_QUALITY_SCORES: Self = Self::new(b'O', b'X');

    /// Cell identifier (`CB`).
    pub const CELL_BARCODE_ID: Self = Self::new(b'C', b'B');

    /// Reference name of the next hit (`CC`).
    pub const NEXT_HIT_REFERENCE_SEQUENCE_NAME: Self = Self::new(b'C', b'C');

    /// BAM only: `CIGAR` in BAM's binary encoding if (and only if) it consists of > 65535 operators
    /// (`CG`).
    pub const CIGAR: Self = Self::new(b'C', b'G');

    /// Edit distance between the color sequence and the color reference (see also `NM`) (`CM`).
    pub const COLOR_EDIT_DISTANCE: Self = Self::new(b'C', b'M');

    /// Free-text comments (`CO`).
    pub const COMMENT: Self = Self::new(b'C', b'O');

    /// Leftmost coordinate of the next hit (`CP`).
    pub const NEXT_HIT_POSITION: Self = Self::new(b'C', b'P');

    /// Color read base qualities (`CQ`).
    pub const COLOR_QUALITY_SCORES: Self = Self::new(b'C', b'Q');

    /// Cellular barcode sequence bases (uncorrected) (`CR`).
    pub const CELL_BARCODE_SEQUENCE: Self = Self::new(b'C', b'R');

    /// Color read sequence (`CS`).
    pub const COLOR_SEQUENCE: Self = Self::new(b'C', b'S');

    /// Complete read annotation tag, used for consensus annotation dummy features (`CT`).
    pub const COMPLETE_READ_ANNOTATIONS: Self = Self::new(b'C', b'T');

    /// Phred quality of the cellular barcode sequence in the `CR` tag (`CY`).
    pub const CELL_BARCODE_QUALITY_SCORES: Self = Self::new(b'C', b'Y');

    /// The 2nd most likely base calls (`E2`).
    pub const NEXT_HIT_SEQUENCE: Self = Self::new(b'E', b'2');

    /// The index of segment in the template (`FI`).
    pub const SEGMENT_INDEX: Self = Self::new(b'F', b'I');

    /// Segment suffix (`FS`).
    pub const SEGMENT_SUFFIX: Self = Self::new(b'F', b'S');

    /// Flow signal intensities (`FZ`).
    pub const ALTERNATIVE_SEQUENCE: Self = Self::new(b'F', b'Z');

    /// Reserved for backwards compatibility reasons (`GC`).
    pub const RESERVED_GC: Self = Self::new(b'G', b'C');

    /// Reserved for backwards compatibility reasons (`GQ`).
    pub const RESERVED_GQ: Self = Self::new(b'G', b'Q');

    /// Reserved for backwards compatibility reasons (`GS`).
    pub const RESERVED_GS: Self = Self::new(b'G', b'S');

    /// Number of perfect hits (`H0`).
    pub const PERFECT_HIT_COUNT: Self = Self::new(b'H', b'0');

    /// Number of 1-difference hits (see also `NM`) (`H1`).
    pub const ONE_DIFFERENCE_HIT_COUNT: Self = Self::new(b'H', b'1');

    /// Number of 2-difference hits (`H2`).
    pub const TWO_DIFFERENCE_HIT_COUNT: Self = Self::new(b'H', b'2');

    /// Query hit index (`HI`).
    pub const HIT_INDEX: Self = Self::new(b'H', b'I');

    /// Query hit total count (`IH`).
    pub const TOTAL_HIT_COUNT: Self = Self::new(b'I', b'H');

    /// Library (`LB`).
    pub const LIBRARY: Self = Self::new(b'L', b'B');

    /// CIGAR string for mate/next segment (`MC`).
    pub const MATE_CIGAR: Self = Self::new(b'M', b'C');

    /// String encoding mismatched and deleted reference bases (`MD`).
    pub const MISMATCHED_POSITIONS: Self = Self::new(b'M', b'D');

    /// Reserved for backwards compatibility reasons (`MF`).
    pub const RESERVED_MF: Self = Self::new(b'M', b'F');

    /// Molecular identifier (`MI`).
    ///
    /// A string that uniquely identifies the molecule from which the record was derived.
    pub const UMI_ID: Self = Self::new(b'M', b'I');

    /// Base modification probabilities (`ML`).
    pub const BASE_MODIFICATION_PROBABILITIES: Self = Self::new(b'M', b'L');

    /// Base modifications / methylation (`MM`).
    pub const BASE_MODIFICATIONS: Self = Self::new(b'M', b'M');

    /// Length of sequence at the time `MM` and `ML` were produced (`MN`).
    pub const BASE_MODIFICATION_SEQUENCE_LENGTH: Self = Self::new(b'M', b'N');

    /// Mapping quality of the mate/next segment (`MQ`).
    pub const MATE_MAPPING_QUALITY: Self = Self::new(b'M', b'Q');

    /// Number of reported alignments that contain the query in the current record (`NH`).
    pub const ALIGNMENT_HIT_COUNT: Self = Self::new(b'N', b'H');

    /// Edit distance to the reference (`NM`).
    pub const EDIT_DISTANCE: Self = Self::new(b'N', b'M');

    /// Original alignment (`OA`).
    pub const ORIGINAL_ALIGNMENT: Self = Self::new(b'O', b'A');

    /// Original CIGAR (deprecated; use `OA` instead) (`OC`).
    pub const ORIGINAL_CIGAR: Self = Self::new(b'O', b'C');

    /// Original mapping position (deprecated; use `OA` instead) (`OP`).
    pub const ORIGINAL_POSITION: Self = Self::new(b'O', b'P');

    /// Original base quality (`OQ`).
    pub const ORIGINAL_QUALITY_SCORES: Self = Self::new(b'O', b'Q');

    /// Original unique molecular barcode bases (`OX`).
    pub const ORIGINAL_UMI_BARCODE_SEQUENCE: Self = Self::new(b'O', b'X');

    /// Program (`PG`).
    pub const PROGRAM: Self = Self::new(b'P', b'G');

    /// Phred likelihood of the template (`PQ`).
    pub const TEMPLATE_LIKELIHOOD: Self = Self::new(b'P', b'Q');

    /// Read annotations for parse of the padded read sequence (`PT`).
    pub const PADDED_READ_ANNOTATIONS: Self = Self::new(b'P', b'T');

    /// Platform unit (`PU`).
    pub const PLATFORM_UNIT: Self = Self::new(b'P', b'U');

    /// Phred quality of the mate/next segment sequence in the `R2` tag (`Q2`).
    pub const MATE_QUALITY_SCORES: Self = Self::new(b'Q', b'2');

    /// Phred quality of the sample barcode sequence in the `BC` tag (`QT`).
    pub const SAMPLE_BARCODE_QUALITY_SCORES: Self = Self::new(b'Q', b'T');

    /// Quality score of the unique molecular identifier in the `RX` tag (`QX`).
    pub const UMI_QUALITY_SCORES: Self = Self::new(b'Q', b'X');

    /// Sequence of the mate/next segment in the template (`R2`).
    pub const MATE_SEQUENCE: Self = Self::new(b'R', b'2');

    /// Read group (`RG`).
    pub const READ_GROUP: Self = Self::new(b'R', b'G');

    /// Reserved for backwards compatibility reasons (`RT`).
    pub const RESERVED_RT: Self = Self::new(b'R', b'T');

    /// Sequence bases of the (possibly corrected) unique molecular identifier (`RX`).
    pub const UMI_SEQUENCE: Self = Self::new(b'R', b'X');

    /// Reserved for backwards compatibility reasons (`S2`).
    pub const RESERVED_S2: Self = Self::new(b'S', b'2');

    /// Other canonical alignments in a chimeric alignment (`SA`).
    pub const OTHER_ALIGNMENTS: Self = Self::new(b'S', b'A');

    /// Template-independent mapping quality (`SM`).
    pub const TEMPLATE_MAPPING_QUALITY: Self = Self::new(b'S', b'M');

    /// Reserved for backwards compatibility reasons (`SQ`).
    pub const RESERVED_SQ: Self = Self::new(b'S', b'Q');

    /// The number of segments in the template (`TC`).
    pub const SEGMENT_COUNT: Self = Self::new(b'T', b'C');

    /// Transcript strand (`TS`).
    pub const TRANSCRIPT_STRAND: Self = Self::new(b'T', b'S');

    /// Phred probability of the 2nd call being wrong conditional on the best being wrong (`U2`).
    pub const NEXT_HIT_QUALITY_SCORES: Self = Self::new(b'U', b'2');

    /// Phred likelihood of the segment, conditional on the mapping being correct (`UQ`).
    pub const SEGMENT_LIKELIHOOD: Self = Self::new(b'U', b'Q');

    /// Creates a tag.
    ///
    /// # Examples
    ///
    /// ```
    /// use noodles_sam::alignment::record::data::field::Tag;
    /// let tag = Tag::new(b'x', b'n');
    /// ```
    pub const fn new(a: u8, b: u8) -> Self {
        Self([a, b])
    }
}

impl fmt::Debug for Tag {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_tuple("Tag")
            .field(&self.as_ref().as_bstr())
            .finish()
    }
}

impl AsRef<[u8; 2]> for Tag {
    fn as_ref(&self) -> &[u8; 2] {
        &self.0
    }
}

impl Borrow<[u8; 2]> for Tag {
    fn borrow(&self) -> &[u8; 2] {
        self.as_ref()
    }
}

impl From<[u8; 2]> for Tag {
    fn from(buf: [u8; 2]) -> Self {
        Self(buf)
    }
}

impl PartialEq<[u8; 2]> for Tag {
    fn eq(&self, other: &[u8; 2]) -> bool {
        self.as_ref().eq(other)
    }
}

impl From<Tag> for [u8; 2] {
    fn from(tag: Tag) -> Self {
        tag.0
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_fmt_debug() {
        assert_eq!(format!("{:?}", Tag::ALIGNMENT_HIT_COUNT), r#"Tag("NH")"#);
    }
}