fastqc_rust/sequence/mod.rs
1pub mod bam;
2pub mod casava;
3pub mod fast5;
4pub mod fastq;
5pub mod group;
6
7pub use bam::open_sequence_file;
8pub use group::SequenceFileGroup;
9
10/// A single sequence record with ID, bases, quality scores, and filter status.
11///
12/// Mirrors `Sequence.Sequence` in Java. The `sequence` field stores
13/// uppercase ASCII bases as bytes (matching Java's `toUpperCase()` in the constructor).
14/// The `quality` field stores raw ASCII quality characters as bytes.
15#[derive(Debug, Clone)]
16pub struct Sequence {
17 pub id: String,
18 /// Uppercase ASCII nucleotide bases (A, C, G, T, N).
19 pub sequence: Vec<u8>,
20 /// Raw ASCII quality characters (not yet offset-adjusted).
21 pub quality: Vec<u8>,
22 /// Whether this sequence was flagged as filtered (e.g. CASAVA filtered).
23 pub is_filtered: bool,
24 /// Colorspace representation, if applicable (SOLiD data).
25 pub colorspace: Option<Vec<u8>>,
26}
27
28impl Sequence {
29 /// Create a new Sequence, converting the base sequence to uppercase.
30 ///
31 /// The Java constructor calls `sequence.toUpperCase()` on the
32 /// sequence string, so we replicate that here.
33 pub fn new(id: String, mut sequence: Vec<u8>, quality: Vec<u8>) -> Self {
34 // uppercase conversion matches Java constructor behavior.
35 // In-place mutation avoids allocating a new Vec.
36 sequence.make_ascii_uppercase();
37 Self {
38 id,
39 sequence,
40 quality,
41 is_filtered: false,
42 colorspace: None,
43 }
44 }
45
46 /// Length of the sequence in bases.
47 pub fn len(&self) -> usize {
48 self.sequence.len()
49 }
50
51 /// Whether the sequence is empty.
52 pub fn is_empty(&self) -> bool {
53 self.sequence.is_empty()
54 }
55}
56
57/// Trait for reading sequences from various file formats.
58///
59/// Mirrors `Sequence.SequenceFile` interface.
60pub trait SequenceFile: Send {
61 /// Read the next sequence from the file, or None at EOF.
62 fn next(&mut self) -> Option<std::io::Result<Sequence>>;
63
64 /// The display name of this file (typically the filename).
65 fn name(&self) -> &str;
66
67 /// Whether this file contains colorspace data (SOLiD).
68 fn is_colorspace(&self) -> bool;
69
70 /// Estimated percentage complete (0.0 - 100.0), for progress display.
71 fn percent_complete(&self) -> f64;
72}