Skip to main content

dryice/
record.rs

1//! Sequencing record trait and owned record type.
2//!
3//! This module defines the two main record-facing abstractions in the
4//! crate: [`SeqRecordLike`], the trait that any sequencing record type
5//! can implement to be written into a `dryice` file, and [`SeqRecord`],
6//! the crate-provided owned row-wise record type returned by the reader.
7
8use crate::error::DryIceError;
9
10/// A read-like sequencing record with name, sequence, and quality fields.
11///
12/// This is the primary write-side interoperability boundary for `dryice`.
13/// Any type that can provide borrowed byte slices for its name, sequence,
14/// and quality fields can implement this trait and be written directly
15/// into a `dryice` file without conversion into a crate-owned type.
16///
17/// # Example
18///
19/// ```
20/// use dryice::SeqRecordLike;
21///
22/// struct MyRecord {
23///     name: Vec<u8>,
24///     seq: Vec<u8>,
25///     qual: Vec<u8>,
26/// }
27///
28/// impl SeqRecordLike for MyRecord {
29///     fn name(&self) -> &[u8] { &self.name }
30///     fn sequence(&self) -> &[u8] { &self.seq }
31///     fn quality(&self) -> &[u8] { &self.qual }
32/// }
33/// ```
34pub trait SeqRecordLike {
35    /// The record name or identifier.
36    fn name(&self) -> &[u8];
37
38    /// The nucleotide sequence.
39    fn sequence(&self) -> &[u8];
40
41    /// The per-base quality scores.
42    fn quality(&self) -> &[u8];
43
44    /// The length of the sequence.
45    fn len(&self) -> usize {
46        self.sequence().len()
47    }
48
49    /// Whether the sequence is empty.
50    fn is_empty(&self) -> bool {
51        self.sequence().is_empty()
52    }
53}
54
55/// An owned, row-wise sequencing record.
56///
57/// This is the primary read-side output type for `dryice`. It is returned
58/// by the reader's record iterator and can also be constructed directly
59/// for testing or interop purposes.
60///
61/// Fields are private and accessed through methods. Construction goes
62/// through invariant-preserving constructors that enforce constraints
63/// such as matching sequence and quality lengths.
64///
65/// `SeqRecord` implements [`SeqRecordLike`], so it can be passed back
66/// into a writer without conversion.
67#[derive(Debug, Clone, PartialEq, Eq)]
68pub struct SeqRecord {
69    name: Vec<u8>,
70    sequence: Vec<u8>,
71    quality: Vec<u8>,
72}
73
74impl SeqRecord {
75    /// Create a new record from owned byte vectors.
76    ///
77    /// # Errors
78    ///
79    /// Returns [`DryIceError::MismatchedSequenceAndQualityLengths`] if
80    /// the sequence and quality vectors have different lengths.
81    pub fn new(name: Vec<u8>, sequence: Vec<u8>, quality: Vec<u8>) -> Result<Self, DryIceError> {
82        if sequence.len() != quality.len() {
83            return Err(DryIceError::MismatchedSequenceAndQualityLengths {
84                sequence_len: sequence.len(),
85                quality_len: quality.len(),
86            });
87        }
88
89        Ok(Self {
90            name,
91            sequence,
92            quality,
93        })
94    }
95
96    /// Create a new record by copying from byte slices.
97    ///
98    /// # Errors
99    ///
100    /// Returns [`DryIceError::MismatchedSequenceAndQualityLengths`] if
101    /// the sequence and quality slices have different lengths.
102    pub fn from_slices(name: &[u8], sequence: &[u8], quality: &[u8]) -> Result<Self, DryIceError> {
103        Self::new(name.to_vec(), sequence.to_vec(), quality.to_vec())
104    }
105
106    /// The record name or identifier.
107    #[must_use]
108    pub fn name(&self) -> &[u8] {
109        &self.name
110    }
111
112    /// The nucleotide sequence.
113    #[must_use]
114    pub fn sequence(&self) -> &[u8] {
115        &self.sequence
116    }
117
118    /// The per-base quality scores.
119    #[must_use]
120    pub fn quality(&self) -> &[u8] {
121        &self.quality
122    }
123
124    /// Consume the record and return the name bytes.
125    #[must_use]
126    pub fn into_name(self) -> Vec<u8> {
127        self.name
128    }
129
130    /// Consume the record and return the sequence bytes.
131    #[must_use]
132    pub fn into_sequence(self) -> Vec<u8> {
133        self.sequence
134    }
135
136    /// Consume the record and return the quality bytes.
137    #[must_use]
138    pub fn into_quality(self) -> Vec<u8> {
139        self.quality
140    }
141
142    /// Consume the record and return all three fields.
143    #[must_use]
144    pub fn into_parts(self) -> (Vec<u8>, Vec<u8>, Vec<u8>) {
145        (self.name, self.sequence, self.quality)
146    }
147
148    /// The record name as a UTF-8 string, if valid.
149    ///
150    /// # Errors
151    ///
152    /// Returns [`std::str::Utf8Error`] if the name bytes are not valid UTF-8.
153    pub fn name_str(&self) -> Result<&str, std::str::Utf8Error> {
154        std::str::from_utf8(&self.name)
155    }
156
157    /// The nucleotide sequence as a UTF-8 string, if valid.
158    ///
159    /// # Errors
160    ///
161    /// Returns [`std::str::Utf8Error`] if the sequence bytes are not valid UTF-8.
162    pub fn sequence_str(&self) -> Result<&str, std::str::Utf8Error> {
163        std::str::from_utf8(&self.sequence)
164    }
165
166    /// The quality scores as a UTF-8 string, if valid.
167    ///
168    /// # Errors
169    ///
170    /// Returns [`std::str::Utf8Error`] if the quality bytes are not valid UTF-8.
171    pub fn quality_str(&self) -> Result<&str, std::str::Utf8Error> {
172        std::str::from_utf8(&self.quality)
173    }
174}
175
176impl std::fmt::Display for SeqRecord {
177    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
178        let name = std::str::from_utf8(&self.name).unwrap_or("<non-utf8>");
179        let seq = std::str::from_utf8(&self.sequence).unwrap_or("<non-utf8>");
180        write!(f, "{name}\t{seq}\t({} bp)", self.sequence.len())
181    }
182}
183
184impl SeqRecordLike for SeqRecord {
185    fn name(&self) -> &[u8] {
186        self.name()
187    }
188
189    fn sequence(&self) -> &[u8] {
190        self.sequence()
191    }
192
193    fn quality(&self) -> &[u8] {
194        self.quality()
195    }
196}
197
198/// Extension trait providing convenience methods for any [`SeqRecordLike`]
199/// implementor.
200///
201/// This trait is automatically implemented for all types that implement
202/// `SeqRecordLike`. It provides higher-level operations such as
203/// conversion into the crate's owned [`SeqRecord`] type.
204pub trait SeqRecordExt: SeqRecordLike {
205    /// Convert this record into an owned [`SeqRecord`] by copying the
206    /// field data.
207    ///
208    /// # Errors
209    ///
210    /// Returns [`DryIceError::MismatchedSequenceAndQualityLengths`] if
211    /// the sequence and quality slices have different lengths.
212    fn to_seq_record(&self) -> Result<SeqRecord, DryIceError> {
213        SeqRecord::from_slices(self.name(), self.sequence(), self.quality())
214    }
215}
216
217impl<T: SeqRecordLike + ?Sized> SeqRecordExt for T {}
218
219/// A zero-payload read-like record with empty name, sequence, and quality.
220#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
221pub struct EmptyRecord;
222
223impl SeqRecordLike for EmptyRecord {
224    fn name(&self) -> &[u8] {
225        b""
226    }
227
228    fn sequence(&self) -> &[u8] {
229        b""
230    }
231
232    fn quality(&self) -> &[u8] {
233        b""
234    }
235}
236
237/// Shared empty record value for key-only writes and tests.
238pub const EMPTY_RECORD: EmptyRecord = EmptyRecord;