Skip to main content

fastqc_rust/sequence/
mod.rs

1pub mod bam;
2pub mod casava;
3pub mod fast5;
4pub mod fastq;
5pub mod group;
6
7pub use bam::open_sequence_file;
8pub use group::SequenceFileGroup;
9
10/// A single sequence record with ID, bases, quality scores, and filter status.
11///
12/// Mirrors `Sequence.Sequence` in Java. The `sequence` field stores
13/// uppercase ASCII bases as bytes (matching Java's `toUpperCase()` in the constructor).
14/// The `quality` field stores raw ASCII quality characters as bytes.
15#[derive(Debug, Clone)]
16pub struct Sequence {
17    pub id: String,
18    /// Uppercase ASCII nucleotide bases (A, C, G, T, N).
19    pub sequence: Vec<u8>,
20    /// Raw ASCII quality characters (not yet offset-adjusted).
21    pub quality: Vec<u8>,
22    /// Whether this sequence was flagged as filtered (e.g. CASAVA filtered).
23    pub is_filtered: bool,
24    /// Colorspace representation, if applicable (SOLiD data).
25    pub colorspace: Option<Vec<u8>>,
26}
27
28impl Sequence {
29    /// Create a new Sequence, converting the base sequence to uppercase.
30    ///
31    /// The Java constructor calls `sequence.toUpperCase()` on the
32    /// sequence string, so we replicate that here.
33    pub fn new(id: String, mut sequence: Vec<u8>, quality: Vec<u8>) -> Self {
34        // uppercase conversion matches Java constructor behavior.
35        // In-place mutation avoids allocating a new Vec.
36        sequence.make_ascii_uppercase();
37        Self {
38            id,
39            sequence,
40            quality,
41            is_filtered: false,
42            colorspace: None,
43        }
44    }
45
46    /// Length of the sequence in bases.
47    pub fn len(&self) -> usize {
48        self.sequence.len()
49    }
50
51    /// Whether the sequence is empty.
52    pub fn is_empty(&self) -> bool {
53        self.sequence.is_empty()
54    }
55}
56
57/// Trait for reading sequences from various file formats.
58///
59/// Mirrors `Sequence.SequenceFile` interface.
60pub trait SequenceFile: Send {
61    /// Read the next sequence from the file, or None at EOF.
62    fn next(&mut self) -> Option<std::io::Result<Sequence>>;
63
64    /// The display name of this file (typically the filename).
65    fn name(&self) -> &str;
66
67    /// Whether this file contains colorspace data (SOLiD).
68    fn is_colorspace(&self) -> bool;
69
70    /// Estimated percentage complete (0.0 - 100.0), for progress display.
71    fn percent_complete(&self) -> f64;
72}