Skip to main content

fgumi_lib/
fastq.rs

1//! FASTQ file parsing and read structure handling.
2//!
3//! This module provides functionality for reading and parsing FASTQ files with support for
4//! complex read structures. It can handle reads that contain multiple segments of different
5//! types (template, UMI, sample barcodes, cell barcodes) and provides utilities for
6//! extracting and manipulating these segments.
7//!
8//! # Read Structures
9//!
10//! Read structures describe the layout of bases within a FASTQ read. For example, a read
11//! might be structured as "8M143T" meaning 8 bases of molecular barcode (UMI) followed by
12//! 143 bases of template sequence.
13//!
14//! # Example
15//!
16//! ```rust,ignore
17//! use read_structure::ReadStructure;
18//! use std::fs::File;
19//! use std::io::BufReader;
20//! use seq_io::fastq::Reader;
21//!
22//! let rs = ReadStructure::from_str("8M143T").unwrap();
23//! let file = File::open("reads.fq").unwrap();
24//! let reader = Reader::new(BufReader::new(file));
25//! let mut iter = ReadSetIterator::new(rs, reader, vec![]);
26//!
27//! for read_set in iter {
28//!     // Process each read set...
29//! }
30//! ```
31
32use anyhow::{Result, anyhow};
33use read_structure::ReadStructure;
34use read_structure::SegmentType;
35use seq_io::fastq::Reader as FastqReader;
36use seq_io::fastq::Record;
37use std::fmt::Display;
38use std::io::BufRead;
39use std::iter::Filter;
40use std::slice::Iter;
41use std::str::FromStr;
42
43/// Type alias for segment type iter functions, which iterate over the segments of a `FastqSet`
44/// filtering for a specific type.
45type SegmentIter<'a> = Filter<Iter<'a, FastqSegment>, fn(&&FastqSegment) -> bool>;
46
47/// A segment of a FASTQ record representing bases and qualities of a specific type.
48///
49/// FASTQ records can be divided into segments based on their biological function
50/// (template, UMI, barcode, etc.). Each segment contains the bases, quality scores,
51/// and the segment type.
52#[derive(PartialEq, Eq, Debug, Clone)]
53pub struct FastqSegment {
54    /// The nucleotide bases of this segment (A, C, G, T, N)
55    pub seq: Vec<u8>,
56    /// Phred-scaled quality scores (typically ASCII-encoded as Phred+33)
57    pub quals: Vec<u8>,
58    /// The biological/functional type of this segment
59    pub segment_type: SegmentType,
60}
61
62////////////////////////////////////////////////////////////////////////////////
63// FastqSet and its impls
64////////////////////////////////////////////////////////////////////////////////
65
66/// Reasons why a read might be skipped during processing.
67///
68/// When processing FASTQ reads with read structures, some reads may not meet
69/// the minimum requirements and need to be skipped. This enum categorizes the
70/// reasons for skipping.
71#[derive(Eq, Hash, PartialEq, Debug, Clone, Copy)]
72pub enum SkipReason {
73    /// The read had too few bases for the required segments in the read structure
74    TooFewBases,
75}
76
77impl Display for SkipReason {
78    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79        match self {
80            SkipReason::TooFewBases => write!(f, "Too few bases"),
81        }
82    }
83}
84
85impl FromStr for SkipReason {
86    type Err = anyhow::Error;
87    fn from_str(string: &str) -> Result<Self, Self::Err> {
88        match string {
89            "too few bases" | "too-few-bases" | "toofewbases" => Ok(SkipReason::TooFewBases),
90            _ => Err(anyhow!(
91                "Invalid skip reason: '{string}' (valid values: 'too-few-bases', 'toofewbases')"
92            )),
93        }
94    }
95}
96
97/// A FASTQ record parsed into its component segments according to a read structure.
98///
99/// When reading FASTQ files with complex structures (e.g., containing UMIs, barcodes,
100/// and template sequences), this structure holds the parsed segments along with the
101/// original header. Each segment is classified by type (template, molecular barcode,
102/// sample barcode, etc.).
103///
104/// # Fields
105///
106/// * `header` - The original FASTQ header line (without the '@' prefix)
107/// * `segments` - Vector of parsed segments in order
108/// * `skip_reason` - If Some, indicates why this read should be skipped during processing
109#[derive(PartialEq, Debug, Clone)]
110pub struct FastqSet {
111    /// The FASTQ header line (without '@' prefix)
112    pub header: Vec<u8>,
113    /// Ordered list of parsed segments from this read
114    pub segments: Vec<FastqSegment>,
115    /// Reason for skipping this read, or None if the read should be processed
116    pub skip_reason: Option<SkipReason>,
117}
118
119impl FastqSet {
120    /// Returns an iterator over template segments.
121    ///
122    /// Template segments contain the actual biological sequence to be analyzed
123    /// (as opposed to barcodes or UMIs).
124    ///
125    /// # Returns
126    ///
127    /// Iterator over references to template segments in this read set
128    pub fn template_segments(&self) -> SegmentIter<'_> {
129        self.segments.iter().filter(|s| s.segment_type == SegmentType::Template)
130    }
131
132    /// Returns an iterator over sample barcode segments.
133    ///
134    /// Sample barcodes identify which sample/library a read belongs to in
135    /// multiplexed sequencing runs.
136    ///
137    /// # Returns
138    ///
139    /// Iterator over references to sample barcode segments
140    pub fn sample_barcode_segments(&self) -> SegmentIter<'_> {
141        self.segments.iter().filter(|s| s.segment_type == SegmentType::SampleBarcode)
142    }
143
144    /// Returns an iterator over molecular barcode (UMI) segments.
145    ///
146    /// Molecular barcodes (UMIs) uniquely tag individual molecules for
147    /// duplicate detection and quantification.
148    ///
149    /// # Returns
150    ///
151    /// Iterator over references to molecular barcode segments
152    pub fn molecular_barcode_segments(&self) -> SegmentIter<'_> {
153        self.segments.iter().filter(|s| s.segment_type == SegmentType::MolecularBarcode)
154    }
155
156    /// Returns an iterator over cell barcode segments.
157    ///
158    /// Cell barcodes identify which cell a read originated from in
159    /// single-cell sequencing experiments.
160    ///
161    /// # Returns
162    ///
163    /// Iterator over references to cell barcode segments
164    pub fn cell_barcode_segments(&self) -> SegmentIter<'_> {
165        self.segments.iter().filter(|s| s.segment_type == SegmentType::CellularBarcode)
166    }
167
168    /// Combines multiple `FastqSet` instances into a single set.
169    ///
170    /// Merges the segments from multiple read sets, preserving the header from
171    /// the first set. Useful for combining segments from multiple FASTQ files
172    /// that represent different parts of the same read (e.g., R1, R2, I1, I2).
173    ///
174    /// # Arguments
175    ///
176    /// * `readsets` - Vector of read sets to combine
177    ///
178    /// # Returns
179    ///
180    /// A new `FastqSet` with all segments combined
181    ///
182    /// # Panics
183    ///
184    /// Panics if the input vector is empty or contains no segments
185    #[must_use]
186    pub fn combine_readsets(readsets: Vec<Self>) -> Self {
187        let total_segments: usize = readsets.iter().map(|s| s.segments.len()).sum();
188        assert!(total_segments > 0, "Cannot call combine readsets on an empty vec!");
189
190        let mut readset_iter = readsets.into_iter();
191        let mut first = readset_iter
192            .next()
193            .expect("readset_iter is non-empty because total_segments > 0 was checked");
194        first.segments.reserve_exact(total_segments - first.segments.len());
195
196        for next_readset in readset_iter {
197            first.segments.extend(next_readset.segments);
198        }
199
200        first
201    }
202
203    /// Creates a `FastqSet` from raw FASTQ record data and a read structure.
204    ///
205    /// This method applies the read structure to segment the sequence and quality
206    /// data, producing a `FastqSet` with appropriately typed segments.
207    ///
208    /// # Arguments
209    ///
210    /// * `header` - The FASTQ header/name (without '@' prefix)
211    /// * `sequence` - The full sequence bases
212    /// * `quality` - The full quality scores
213    /// * `read_structure` - The read structure describing segment layout
214    /// * `skip_reasons` - List of reasons to skip reads gracefully instead of failing
215    ///
216    /// # Returns
217    ///
218    /// A `FastqSet` with segments parsed according to the read structure.
219    /// If the read doesn't have enough bases and `TooFewBases` is in `skip_reasons`,
220    /// returns a `FastqSet` with `skip_reason` set.
221    ///
222    /// # Panics
223    ///
224    /// Panics if the read has too few bases and `TooFewBases` is not in `skip_reasons`.
225    #[must_use]
226    pub fn from_record_with_structure(
227        header: &[u8],
228        sequence: &[u8],
229        quality: &[u8],
230        read_structure: &ReadStructure,
231        skip_reasons: &[SkipReason],
232    ) -> Self {
233        let mut segments = Vec::with_capacity(read_structure.number_of_segments());
234
235        // Check minimum length required by the read structure
236        let min_len: usize = read_structure.iter().map(|s| s.length().unwrap_or(0)).sum();
237        if sequence.len() < min_len {
238            if skip_reasons.contains(&SkipReason::TooFewBases) {
239                return Self {
240                    header: header.to_vec(),
241                    segments: vec![],
242                    skip_reason: Some(SkipReason::TooFewBases),
243                };
244            }
245            let read_name_str = String::from_utf8_lossy(header);
246            panic!(
247                "Read {} had too few bases to demux {} vs. {} needed in read structure {}.",
248                read_name_str,
249                sequence.len(),
250                min_len,
251                read_structure
252            );
253        }
254
255        // Extract segments according to the read structure
256        for (segment_index, read_segment) in read_structure.iter().enumerate() {
257            let (seq, quals) =
258                read_segment.extract_bases_and_quals(sequence, quality).unwrap_or_else(|e| {
259                    let read_name_str = String::from_utf8_lossy(header);
260                    panic!(
261                        "Error extracting bases (len: {}) or quals (len: {}) for the {}th \
262                         segment ({}) in read structure ({}) from record {}; {}",
263                        sequence.len(),
264                        quality.len(),
265                        segment_index,
266                        read_segment,
267                        read_structure,
268                        read_name_str,
269                        e
270                    );
271                });
272
273            segments.push(FastqSegment {
274                seq: seq.to_vec(),
275                quals: quals.to_vec(),
276                segment_type: read_segment.kind,
277            });
278        }
279
280        Self { header: header.to_vec(), segments, skip_reason: None }
281    }
282}
283
284////////////////////////////////////////////////////////////////////////////////
285// ReadSetIterator and its impls
286////////////////////////////////////////////////////////////////////////////////
287
288/// Iterator for parsing FASTQ files according to a read structure.
289///
290/// This iterator reads FASTQ records and parses them into `FastqSet` objects based on
291/// the provided read structure. It handles validation and can optionally skip reads that
292/// don't meet requirements (e.g., too few bases) rather than panicking.
293///
294/// # Example
295///
296/// ```rust,ignore
297/// let read_structure = ReadStructure::from_str("8M143T")?;
298/// let file = File::open("reads.fq")?;
299/// let reader = Reader::new(BufReader::new(file));
300/// let mut iterator = ReadSetIterator::new(read_structure, reader, vec![]);
301///
302/// for read_set in iterator {
303///     // Process each parsed read set
304/// }
305/// ```
306pub struct ReadSetIterator {
307    /// Read structure describing the layout of bases in each read
308    read_structure: ReadStructure,
309    /// FASTQ file reader
310    source: FastqReader<Box<dyn BufRead + Send>>,
311    /// Reasons to skip reads instead of panicking (e.g., too few bases)
312    skip_reasons: Vec<SkipReason>,
313}
314
315impl Iterator for ReadSetIterator {
316    type Item = FastqSet;
317
318    fn next(&mut self) -> Option<Self::Item> {
319        let rec = self.source.next()?;
320        let record = match rec {
321            Ok(record) => record,
322            Err(e) => {
323                panic!("Error parsing FASTQ record: {e}");
324            }
325        };
326        Some(FastqSet::from_record_with_structure(
327            record.head(),
328            record.seq(),
329            record.qual(),
330            &self.read_structure,
331            &self.skip_reasons,
332        ))
333    }
334}
335
336impl ReadSetIterator {
337    /// Creates a new iterator for parsing FASTQ records with a read structure.
338    ///
339    /// # Arguments
340    ///
341    /// * `read_structure` - Read structure describing the segment layout
342    /// * `source` - FASTQ reader for input
343    /// * `skip_reasons` - List of reasons to skip reads gracefully instead of panicking.
344    ///   If a read fails for a reason not in this list, the iterator will panic.
345    ///
346    /// # Returns
347    ///
348    /// A new `ReadSetIterator` instance
349    ///
350    /// # Example
351    ///
352    /// ```rust,ignore
353    /// // Skip reads that are too short instead of panicking
354    /// let iterator = ReadSetIterator::new(
355    ///     read_structure,
356    ///     reader,
357    ///     vec![SkipReason::TooFewBases]
358    /// );
359    /// ```
360    #[must_use]
361    pub fn new(
362        read_structure: ReadStructure,
363        source: FastqReader<Box<dyn BufRead + Send>>,
364        skip_reasons: Vec<SkipReason>,
365    ) -> Self {
366        Self { read_structure, source, skip_reasons }
367    }
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373
374    fn read_set(segments: Vec<FastqSegment>) -> FastqSet {
375        FastqSet { header: "NOT_IMPORTANT".as_bytes().to_owned(), segments, skip_reason: None }
376    }
377
378    fn seg(bases: &[u8], segment_type: SegmentType) -> FastqSegment {
379        let quals = vec![b'#'; bases.len()];
380        FastqSegment { seq: bases.to_vec(), quals, segment_type }
381    }
382
383    #[test]
384    fn test_template_segments() {
385        let segments = vec![
386            seg("AGCT".as_bytes(), SegmentType::SampleBarcode),
387            seg("GATA".as_bytes(), SegmentType::Template),
388            seg("CAC".as_bytes(), SegmentType::Template),
389            seg("GACCCC".as_bytes(), SegmentType::MolecularBarcode),
390        ];
391        let expected = vec![
392            seg("GATA".as_bytes(), SegmentType::Template),
393            seg("CAC".as_bytes(), SegmentType::Template),
394        ];
395
396        let read_set = read_set(segments);
397
398        assert_eq!(expected, read_set.template_segments().cloned().collect::<Vec<_>>());
399    }
400    #[test]
401    fn test_sample_barcode_segments() {
402        let segments = vec![
403            seg("AGCT".as_bytes(), SegmentType::Template),
404            seg("GATA".as_bytes(), SegmentType::SampleBarcode),
405            seg("CAC".as_bytes(), SegmentType::SampleBarcode),
406            seg("GACCCC".as_bytes(), SegmentType::MolecularBarcode),
407        ];
408        let expected = vec![
409            seg("GATA".as_bytes(), SegmentType::SampleBarcode),
410            seg("CAC".as_bytes(), SegmentType::SampleBarcode),
411        ];
412
413        let read_set = read_set(segments);
414
415        assert_eq!(expected, read_set.sample_barcode_segments().cloned().collect::<Vec<_>>());
416    }
417    #[test]
418    fn test_molecular_barcode_segments() {
419        let segments = vec![
420            seg("AGCT".as_bytes(), SegmentType::Template),
421            seg("GATA".as_bytes(), SegmentType::MolecularBarcode),
422            seg("CAC".as_bytes(), SegmentType::MolecularBarcode),
423            seg("GACCCC".as_bytes(), SegmentType::SampleBarcode),
424        ];
425        let expected = vec![
426            seg("GATA".as_bytes(), SegmentType::MolecularBarcode),
427            seg("CAC".as_bytes(), SegmentType::MolecularBarcode),
428        ];
429
430        let read_set = read_set(segments);
431
432        assert_eq!(expected, read_set.molecular_barcode_segments().cloned().collect::<Vec<_>>());
433    }
434
435    #[test]
436    fn test_combine_readsets() {
437        let segments1 = vec![
438            seg("A".as_bytes(), SegmentType::Template),
439            seg("G".as_bytes(), SegmentType::Template),
440            seg("C".as_bytes(), SegmentType::MolecularBarcode),
441            seg("T".as_bytes(), SegmentType::SampleBarcode),
442        ];
443        let read_set1 = read_set(segments1.clone());
444        let segments2 = vec![
445            seg("AA".as_bytes(), SegmentType::Template),
446            seg("AG".as_bytes(), SegmentType::Template),
447            seg("AC".as_bytes(), SegmentType::MolecularBarcode),
448            seg("AT".as_bytes(), SegmentType::SampleBarcode),
449        ];
450        let read_set2 = read_set(segments2.clone());
451        let segments3 = vec![
452            seg("AAA".as_bytes(), SegmentType::Template),
453            seg("AAG".as_bytes(), SegmentType::Template),
454            seg("AAC".as_bytes(), SegmentType::MolecularBarcode),
455            seg("AAT".as_bytes(), SegmentType::SampleBarcode),
456        ];
457        let read_set3 = read_set(segments3.clone());
458
459        let mut expected_segments = Vec::new();
460        expected_segments.extend(segments1);
461        expected_segments.extend(segments2);
462        expected_segments.extend(segments3);
463        let expected = read_set(expected_segments);
464
465        let result = FastqSet::combine_readsets(vec![read_set1, read_set2, read_set3]);
466
467        assert_eq!(result, expected);
468    }
469
470    #[test]
471    #[should_panic(expected = "Cannot call combine readsets on an empty vec!")]
472    fn test_combine_readsets_fails_on_empty_vector() {
473        let _result = FastqSet::combine_readsets(Vec::new());
474    }
475
476    // =====================================================================
477    // Cell barcode segment tests
478    // =====================================================================
479
480    #[test]
481    fn test_cell_barcode_segments() {
482        let segments = vec![
483            seg("AGCT".as_bytes(), SegmentType::Template),
484            seg("GATA".as_bytes(), SegmentType::CellularBarcode),
485            seg("CAC".as_bytes(), SegmentType::CellularBarcode),
486            seg("GACCCC".as_bytes(), SegmentType::MolecularBarcode),
487        ];
488        let expected = vec![
489            seg("GATA".as_bytes(), SegmentType::CellularBarcode),
490            seg("CAC".as_bytes(), SegmentType::CellularBarcode),
491        ];
492
493        let read_set = read_set(segments);
494
495        assert_eq!(expected, read_set.cell_barcode_segments().cloned().collect::<Vec<_>>());
496    }
497
498    #[test]
499    fn test_cell_barcode_segments_empty() {
500        let segments = vec![
501            seg("AGCT".as_bytes(), SegmentType::Template),
502            seg("GATA".as_bytes(), SegmentType::MolecularBarcode),
503        ];
504
505        let read_set = read_set(segments);
506
507        assert_eq!(0, read_set.cell_barcode_segments().count());
508    }
509
510    // =====================================================================
511    // SkipReason Display and FromStr tests
512    // =====================================================================
513
514    #[test]
515    fn test_skip_reason_display() {
516        assert_eq!(SkipReason::TooFewBases.to_string(), "Too few bases");
517    }
518
519    #[test]
520    fn test_skip_reason_from_str_valid() {
521        // Test various valid forms
522        assert_eq!(SkipReason::from_str("too few bases").unwrap(), SkipReason::TooFewBases);
523        assert_eq!(SkipReason::from_str("too-few-bases").unwrap(), SkipReason::TooFewBases);
524        assert_eq!(SkipReason::from_str("toofewbases").unwrap(), SkipReason::TooFewBases);
525    }
526
527    #[test]
528    fn test_skip_reason_from_str_invalid() {
529        let result = SkipReason::from_str("invalid-reason");
530        assert!(result.is_err());
531        let err = result.unwrap_err();
532        assert!(err.to_string().contains("Invalid skip reason"));
533        assert!(err.to_string().contains("invalid-reason"));
534    }
535
536    // =====================================================================
537    // ReadSetIterator tests
538    // =====================================================================
539
540    #[test]
541    fn test_read_set_iterator_basic() {
542        use std::io::Cursor;
543
544        // Create a simple FASTQ record
545        let fastq_data = b"@read1\nACGTACGT\n+\nIIIIIIII\n";
546        let cursor = Cursor::new(fastq_data.to_vec());
547        let reader: FastqReader<Box<dyn BufRead + Send>> = FastqReader::new(Box::new(cursor));
548
549        // Simple read structure: 4M (molecular barcode) + 4T (template)
550        let read_structure = ReadStructure::from_str("4M4T").unwrap();
551        let mut iterator = ReadSetIterator::new(read_structure, reader, vec![]);
552
553        let result = iterator.next();
554        assert!(result.is_some());
555
556        let fastq_set = result.unwrap();
557        assert_eq!(fastq_set.header, b"read1");
558        assert_eq!(fastq_set.segments.len(), 2);
559        assert!(fastq_set.skip_reason.is_none());
560
561        // First segment should be molecular barcode
562        assert_eq!(fastq_set.segments[0].seq, b"ACGT");
563        assert_eq!(fastq_set.segments[0].segment_type, SegmentType::MolecularBarcode);
564
565        // Second segment should be template
566        assert_eq!(fastq_set.segments[1].seq, b"ACGT");
567        assert_eq!(fastq_set.segments[1].segment_type, SegmentType::Template);
568
569        // No more records
570        assert!(iterator.next().is_none());
571    }
572
573    #[test]
574    fn test_read_set_iterator_skip_too_few_bases() {
575        use std::io::Cursor;
576
577        // Create a FASTQ record that's too short for the read structure
578        let fastq_data = b"@read1\nACGT\n+\nIIII\n";
579        let cursor = Cursor::new(fastq_data.to_vec());
580        let reader: FastqReader<Box<dyn BufRead + Send>> = FastqReader::new(Box::new(cursor));
581
582        // Read structure requires 10 bases total
583        let read_structure = ReadStructure::from_str("4M6T").unwrap();
584        let mut iterator =
585            ReadSetIterator::new(read_structure, reader, vec![SkipReason::TooFewBases]);
586
587        let result = iterator.next();
588        assert!(result.is_some());
589
590        let fastq_set = result.unwrap();
591        assert_eq!(fastq_set.header, b"read1");
592        assert!(fastq_set.segments.is_empty());
593        assert_eq!(fastq_set.skip_reason, Some(SkipReason::TooFewBases));
594    }
595
596    #[test]
597    #[should_panic(expected = "too few bases")]
598    fn test_read_set_iterator_panic_on_too_few_bases_without_skip() {
599        use std::io::Cursor;
600
601        // Create a FASTQ record that's too short
602        let fastq_data = b"@read1\nACGT\n+\nIIII\n";
603        let cursor = Cursor::new(fastq_data.to_vec());
604        let reader: FastqReader<Box<dyn BufRead + Send>> = FastqReader::new(Box::new(cursor));
605
606        // Read structure requires 10 bases total
607        let read_structure = ReadStructure::from_str("4M6T").unwrap();
608        let mut iterator = ReadSetIterator::new(read_structure, reader, vec![]);
609
610        // This should panic because TooFewBases is not in skip_reasons
611        let _ = iterator.next();
612    }
613
614    #[test]
615    fn test_read_set_iterator_multiple_records() {
616        use std::io::Cursor;
617
618        let fastq_data = b"@read1\nACGTAAAA\n+\nIIIIIIII\n@read2\nTGCATTTT\n+\nIIIIIIII\n";
619        let cursor = Cursor::new(fastq_data.to_vec());
620        let reader: FastqReader<Box<dyn BufRead + Send>> = FastqReader::new(Box::new(cursor));
621
622        let read_structure = ReadStructure::from_str("4M4T").unwrap();
623        let mut iterator = ReadSetIterator::new(read_structure, reader, vec![]);
624
625        // First record
626        let first = iterator.next().unwrap();
627        assert_eq!(first.header, b"read1");
628        assert_eq!(first.segments[0].seq, b"ACGT");
629        assert_eq!(first.segments[1].seq, b"AAAA");
630
631        // Second record
632        let second = iterator.next().unwrap();
633        assert_eq!(second.header, b"read2");
634        assert_eq!(second.segments[0].seq, b"TGCA");
635        assert_eq!(second.segments[1].seq, b"TTTT");
636
637        // No more records
638        assert!(iterator.next().is_none());
639    }
640
641    #[test]
642    fn test_read_set_iterator_variable_length_segment() {
643        use std::io::Cursor;
644
645        // Test with variable-length template segment (last segment gets remaining bases)
646        let fastq_data = b"@read1\nACGTTTTTTTTT\n+\nIIIIIIIIIIII\n";
647        let cursor = Cursor::new(fastq_data.to_vec());
648        let reader: FastqReader<Box<dyn BufRead + Send>> = FastqReader::new(Box::new(cursor));
649
650        // 4M + variable T (remaining bases go to template)
651        let read_structure = ReadStructure::from_str("4M+T").unwrap();
652        let mut iterator = ReadSetIterator::new(read_structure, reader, vec![]);
653
654        let result = iterator.next().unwrap();
655        assert_eq!(result.segments.len(), 2);
656
657        // Fixed molecular barcode segment
658        assert_eq!(result.segments[0].seq, b"ACGT");
659        assert_eq!(result.segments[0].segment_type, SegmentType::MolecularBarcode);
660
661        // Variable template gets remaining 8 bases
662        assert_eq!(result.segments[1].seq, b"TTTTTTTT");
663        assert_eq!(result.segments[1].segment_type, SegmentType::Template);
664    }
665
666    #[test]
667    fn test_fastq_set_with_skip_reason_only() {
668        // Test creating a FastqSet with skip_reason and empty segments
669        let fastq_set = FastqSet {
670            header: b"skipped_read".to_vec(),
671            segments: vec![],
672            skip_reason: Some(SkipReason::TooFewBases),
673        };
674
675        assert_eq!(fastq_set.skip_reason, Some(SkipReason::TooFewBases));
676        assert!(fastq_set.segments.is_empty());
677        assert_eq!(fastq_set.template_segments().count(), 0);
678        assert_eq!(fastq_set.molecular_barcode_segments().count(), 0);
679        assert_eq!(fastq_set.sample_barcode_segments().count(), 0);
680        assert_eq!(fastq_set.cell_barcode_segments().count(), 0);
681    }
682}