Skip to main content

holodeck_lib/output/
fastq.rs

1//! FASTQ output for simulated reads.
2//!
3//! Writes bgzf-compressed FASTQ files. Supports both single-threaded
4//! compression (via noodles-bgzf) and multi-threaded compression (via
5//! pooled-writer) depending on how the writer is constructed.
6
7use std::fs::File;
8use std::io::{BufWriter, Write};
9use std::path::Path;
10
11use anyhow::{Context, Result};
12use noodles_bgzf as bgzf;
13
14use crate::read::SimulatedRead;
15
16/// A FASTQ writer that writes bgzf-compressed output.
17///
18/// The inner writer handles BGZF block framing and compression.  Use
19/// [`new`](Self::new) for single-threaded compression or
20/// [`from_writer`](Self::from_writer) to supply a pre-configured writer
21/// (e.g. a [`pooled_writer::PooledWriter`] for multi-threaded compression).
22pub struct FastqWriter {
23    writer: Box<dyn Write>,
24}
25
26impl FastqWriter {
27    /// Create a new single-threaded FASTQ writer at the given path.
28    ///
29    /// Uses noodles-bgzf with the specified compression level (0-12).
30    ///
31    /// # Errors
32    /// Returns an error if the file cannot be created or the compression
33    /// level is invalid.
34    pub fn new(path: &Path, compression: u8) -> Result<Self> {
35        let file = File::create(path)
36            .with_context(|| format!("Failed to create FASTQ file: {}", path.display()))?;
37        let level = bgzf::io::writer::CompressionLevel::new(compression)
38            .ok_or_else(|| anyhow::anyhow!("invalid compression level: {compression}"))?;
39        let writer = bgzf::io::writer::Builder::default()
40            .set_compression_level(level)
41            .build_from_writer(BufWriter::new(file));
42        Ok(Self { writer: Box::new(writer) })
43    }
44
45    /// Create a FASTQ writer from an existing writer that handles BGZF
46    /// compression (e.g. a [`pooled_writer::PooledWriter`]).
47    pub fn from_writer(writer: impl Write + 'static) -> Self {
48        Self { writer: Box::new(writer) }
49    }
50
51    /// Write a single read as a FASTQ record.
52    ///
53    /// Writes the four-line FASTQ format:
54    /// ```text
55    /// @read_name
56    /// BASES
57    /// +
58    /// QUALITIES
59    /// ```
60    ///
61    /// # Errors
62    /// Returns an error if writing fails.
63    pub fn write_read(&mut self, read: &SimulatedRead) -> Result<()> {
64        self.writer.write_all(b"@")?;
65        self.writer.write_all(read.name.as_bytes())?;
66        self.writer.write_all(b"\n")?;
67        self.writer.write_all(&read.bases)?;
68        self.writer.write_all(b"\n+\n")?;
69        self.writer.write_all(&read.qualities)?;
70        self.writer.write_all(b"\n")?;
71        Ok(())
72    }
73
74    /// Finalize the FASTQ file.
75    ///
76    /// Drops the underlying writer, which flushes any buffered data and
77    /// (for BGZF writers) writes the EOF marker.  For pooled writers, this
78    /// sends remaining data to the compression pool.
79    pub fn close(self) {
80        drop(self.writer);
81    }
82}
83
84#[cfg(test)]
85mod tests {
86    use std::io::Read;
87
88    use super::*;
89    use crate::read::SimulatedRead;
90
91    #[test]
92    fn test_fastq_write_and_read_back() {
93        let dir = tempfile::tempdir().unwrap();
94        let path = dir.path().join("test.fastq.gz");
95
96        // Write two reads.
97        {
98            let mut w = FastqWriter::new(&path, 1).unwrap();
99            w.write_read(&SimulatedRead {
100                name: "read1".to_string(),
101                bases: b"ACGT".to_vec(),
102                qualities: b"IIII".to_vec(),
103            })
104            .unwrap();
105            w.write_read(&SimulatedRead {
106                name: "read2".to_string(),
107                bases: b"TTAA".to_vec(),
108                qualities: b"????".to_vec(),
109            })
110            .unwrap();
111            w.close();
112        }
113
114        // Read back and verify.
115        let file = File::open(&path).unwrap();
116        let mut decoder = flate2::read::MultiGzDecoder::new(file);
117        let mut contents = String::new();
118        decoder.read_to_string(&mut contents).unwrap();
119
120        assert_eq!(contents, "@read1\nACGT\n+\nIIII\n@read2\nTTAA\n+\n????\n");
121    }
122
123    #[test]
124    fn test_fastq_write_empty_read() {
125        let dir = tempfile::tempdir().unwrap();
126        let path = dir.path().join("empty.fastq.gz");
127
128        {
129            let mut w = FastqWriter::new(&path, 1).unwrap();
130            w.write_read(&SimulatedRead {
131                name: "r1".to_string(),
132                bases: Vec::new(),
133                qualities: Vec::new(),
134            })
135            .unwrap();
136            w.close();
137        }
138
139        let file = File::open(&path).unwrap();
140        let mut decoder = flate2::read::MultiGzDecoder::new(file);
141        let mut contents = String::new();
142        decoder.read_to_string(&mut contents).unwrap();
143
144        assert_eq!(contents, "@r1\n\n+\n\n");
145    }
146}