dryice/record.rs
1//! Sequencing record trait and owned record type.
2//!
3//! This module defines the two main record-facing abstractions in the
4//! crate: [`SeqRecordLike`], the trait that any sequencing record type
5//! can implement to be written into a `dryice` file, and [`SeqRecord`],
6//! the crate-provided owned row-wise record type returned by the reader.
7
8use crate::error::DryIceError;
9
10/// A read-like sequencing record with name, sequence, and quality fields.
11///
12/// This is the primary write-side interoperability boundary for `dryice`.
13/// Any type that can provide borrowed byte slices for its name, sequence,
14/// and quality fields can implement this trait and be written directly
15/// into a `dryice` file without conversion into a crate-owned type.
16///
17/// # Example
18///
19/// ```
20/// use dryice::SeqRecordLike;
21///
22/// struct MyRecord {
23/// name: Vec<u8>,
24/// seq: Vec<u8>,
25/// qual: Vec<u8>,
26/// }
27///
28/// impl SeqRecordLike for MyRecord {
29/// fn name(&self) -> &[u8] { &self.name }
30/// fn sequence(&self) -> &[u8] { &self.seq }
31/// fn quality(&self) -> &[u8] { &self.qual }
32/// }
33/// ```
34pub trait SeqRecordLike {
35 /// The record name or identifier.
36 fn name(&self) -> &[u8];
37
38 /// The nucleotide sequence.
39 fn sequence(&self) -> &[u8];
40
41 /// The per-base quality scores.
42 fn quality(&self) -> &[u8];
43
44 /// The length of the sequence.
45 fn len(&self) -> usize {
46 self.sequence().len()
47 }
48
49 /// Whether the sequence is empty.
50 fn is_empty(&self) -> bool {
51 self.sequence().is_empty()
52 }
53}
54
55/// An owned, row-wise sequencing record.
56///
57/// This is the primary read-side output type for `dryice`. It is returned
58/// by the reader's record iterator and can also be constructed directly
59/// for testing or interop purposes.
60///
61/// Fields are private and accessed through methods. Construction goes
62/// through invariant-preserving constructors that enforce constraints
63/// such as matching sequence and quality lengths.
64///
65/// `SeqRecord` implements [`SeqRecordLike`], so it can be passed back
66/// into a writer without conversion.
67#[derive(Debug, Clone, PartialEq, Eq)]
68pub struct SeqRecord {
69 name: Vec<u8>,
70 sequence: Vec<u8>,
71 quality: Vec<u8>,
72}
73
74impl SeqRecord {
75 /// Create a new record from owned byte vectors.
76 ///
77 /// # Errors
78 ///
79 /// Returns [`DryIceError::MismatchedSequenceAndQualityLengths`] if
80 /// the sequence and quality vectors have different lengths.
81 pub fn new(name: Vec<u8>, sequence: Vec<u8>, quality: Vec<u8>) -> Result<Self, DryIceError> {
82 if sequence.len() != quality.len() {
83 return Err(DryIceError::MismatchedSequenceAndQualityLengths {
84 sequence_len: sequence.len(),
85 quality_len: quality.len(),
86 });
87 }
88
89 Ok(Self {
90 name,
91 sequence,
92 quality,
93 })
94 }
95
96 /// Create a new record by copying from byte slices.
97 ///
98 /// # Errors
99 ///
100 /// Returns [`DryIceError::MismatchedSequenceAndQualityLengths`] if
101 /// the sequence and quality slices have different lengths.
102 pub fn from_slices(name: &[u8], sequence: &[u8], quality: &[u8]) -> Result<Self, DryIceError> {
103 Self::new(name.to_vec(), sequence.to_vec(), quality.to_vec())
104 }
105
106 /// The record name or identifier.
107 #[must_use]
108 pub fn name(&self) -> &[u8] {
109 &self.name
110 }
111
112 /// The nucleotide sequence.
113 #[must_use]
114 pub fn sequence(&self) -> &[u8] {
115 &self.sequence
116 }
117
118 /// The per-base quality scores.
119 #[must_use]
120 pub fn quality(&self) -> &[u8] {
121 &self.quality
122 }
123
124 /// Consume the record and return the name bytes.
125 #[must_use]
126 pub fn into_name(self) -> Vec<u8> {
127 self.name
128 }
129
130 /// Consume the record and return the sequence bytes.
131 #[must_use]
132 pub fn into_sequence(self) -> Vec<u8> {
133 self.sequence
134 }
135
136 /// Consume the record and return the quality bytes.
137 #[must_use]
138 pub fn into_quality(self) -> Vec<u8> {
139 self.quality
140 }
141
142 /// Consume the record and return all three fields.
143 #[must_use]
144 pub fn into_parts(self) -> (Vec<u8>, Vec<u8>, Vec<u8>) {
145 (self.name, self.sequence, self.quality)
146 }
147
148 /// The record name as a UTF-8 string, if valid.
149 ///
150 /// # Errors
151 ///
152 /// Returns [`std::str::Utf8Error`] if the name bytes are not valid UTF-8.
153 pub fn name_str(&self) -> Result<&str, std::str::Utf8Error> {
154 std::str::from_utf8(&self.name)
155 }
156
157 /// The nucleotide sequence as a UTF-8 string, if valid.
158 ///
159 /// # Errors
160 ///
161 /// Returns [`std::str::Utf8Error`] if the sequence bytes are not valid UTF-8.
162 pub fn sequence_str(&self) -> Result<&str, std::str::Utf8Error> {
163 std::str::from_utf8(&self.sequence)
164 }
165
166 /// The quality scores as a UTF-8 string, if valid.
167 ///
168 /// # Errors
169 ///
170 /// Returns [`std::str::Utf8Error`] if the quality bytes are not valid UTF-8.
171 pub fn quality_str(&self) -> Result<&str, std::str::Utf8Error> {
172 std::str::from_utf8(&self.quality)
173 }
174}
175
176impl std::fmt::Display for SeqRecord {
177 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
178 let name = std::str::from_utf8(&self.name).unwrap_or("<non-utf8>");
179 let seq = std::str::from_utf8(&self.sequence).unwrap_or("<non-utf8>");
180 write!(f, "{name}\t{seq}\t({} bp)", self.sequence.len())
181 }
182}
183
184impl SeqRecordLike for SeqRecord {
185 fn name(&self) -> &[u8] {
186 self.name()
187 }
188
189 fn sequence(&self) -> &[u8] {
190 self.sequence()
191 }
192
193 fn quality(&self) -> &[u8] {
194 self.quality()
195 }
196}
197
198/// Extension trait providing convenience methods for any [`SeqRecordLike`]
199/// implementor.
200///
201/// This trait is automatically implemented for all types that implement
202/// `SeqRecordLike`. It provides higher-level operations such as
203/// conversion into the crate's owned [`SeqRecord`] type.
204pub trait SeqRecordExt: SeqRecordLike {
205 /// Convert this record into an owned [`SeqRecord`] by copying the
206 /// field data.
207 ///
208 /// # Errors
209 ///
210 /// Returns [`DryIceError::MismatchedSequenceAndQualityLengths`] if
211 /// the sequence and quality slices have different lengths.
212 fn to_seq_record(&self) -> Result<SeqRecord, DryIceError> {
213 SeqRecord::from_slices(self.name(), self.sequence(), self.quality())
214 }
215}
216
217impl<T: SeqRecordLike + ?Sized> SeqRecordExt for T {}
218
219/// A zero-payload read-like record with empty name, sequence, and quality.
220#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
221pub struct EmptyRecord;
222
223impl SeqRecordLike for EmptyRecord {
224 fn name(&self) -> &[u8] {
225 b""
226 }
227
228 fn sequence(&self) -> &[u8] {
229 b""
230 }
231
232 fn quality(&self) -> &[u8] {
233 b""
234 }
235}
236
237/// Shared empty record value for key-only writes and tests.
238pub const EMPTY_RECORD: EmptyRecord = EmptyRecord;