Skip to main content

read_structure/
lib.rs

1//! Read structures is a library for working with strings that describe how the bases in a sequencing run
2//! should be allocated into logical reads.
3//!
4//! Each read structure is made up of one or more read segments which are in turn a segment type.
5//!
6//! For more details see [here](https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures)
7//!
8//! # Example
9//!
10//! Parsing a complex read structure.
11//!
12//! ```rust
13//! use std::str::FromStr;
14//! use read_structure::ReadStructure;
15//!
16//! let rs = ReadStructure::from_str("76T8B8B76T").unwrap();
17//! let templates: String = rs.templates().map(|s| s.to_string()).collect();
18//! assert_eq!(templates, "76T76T");
19//! ```
20//!
21//! Extracting segments from an actual read based on the read structure:
22//!
23//! ```rust
24//! use std::str::FromStr;
25//! use read_structure::{ReadStructure, SegmentType, SkipHandling};
26//!
27//! let bases = b"\
28//!     AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGGGGGGGGCCCCCCCCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT";
29//! let quals = &[b'I'; 168][..];
30//! let rs = ReadStructure::from_str("76T8B8B76T").unwrap();
31//!
32//! let templates: Vec<&[u8]> = rs.extract(bases, quals, SkipHandling::Exclude)
33//!     .unwrap()
34//!     .filter(|(seg, _, _)| seg.kind == SegmentType::Template)
35//!     .map(|(_, bases, _)| bases)
36//!     .collect();
37//!
38//! assert_eq!(templates, vec![
39//!     &b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"[..],
40//!     &b"TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"[..],
41//! ]);
42//! ```
43
44#![allow(unused, clippy::must_use_candidate)]
45#![allow(dead_code)]
46
47mod read_segment;
48mod read_structure;
49mod segment_type;
50
51pub use crate::read_structure::*;
52pub use read_segment::*;
53pub use segment_type::*;
54use thiserror::Error;
55
56#[derive(Debug, Error)]
57pub enum ReadStructureError {
58    #[error("Example")]
59    Example,
60
61    #[error("Invalid read structure: {0}")]
62    InvalidReadStructure(String),
63
64    #[error("Mismatching bases and quals lengths: {bases_len}, {quals_len}")]
65    MismatchingBasesAndQualsLen { bases_len: usize, quals_len: usize },
66
67    #[error("Read structure missing length information: {}[{}]{}", .0.prefix, .0.error, .0.suffix)]
68    ReadStructureMissingLengthInformation(ErrorMessageParts),
69
70    #[error("Read structure missing operator: {}[{}]{}", .0.prefix, .0.error, .0.suffix)]
71    ReadStructureMissingOperator(ErrorMessageParts),
72
73    #[error("Read structure had unknown type: {}[{}]{}", .0.prefix, .0.error, .0.suffix)]
74    ReadStructureHadUnknownType(ErrorMessageParts),
75
76    #[error("Read structure contains zero elements")]
77    ReadStructureContainsZeroElements,
78
79    #[error("Read structure contains more than one indefinite-length (`+`) segment: {0}")]
80    ReadStructureMultipleIndefiniteLengthSegments(ReadSegment),
81
82    /// The read is too short to accommodate every fixed-length segment in the read
83    /// structure. `required` is the sum of all fixed segment lengths, plus 1 if the
84    /// structure also has an indefinite (`+`) segment (which must be at least one base).
85    #[error("Read of length {read_len} is shorter than required minimum {required}")]
86    ReadTooShort { read_len: usize, required: usize },
87
88    /// A fixed-length read structure was handed a read of the wrong length. Fixed
89    /// structures require exact-length reads; anything longer is almost always a
90    /// caller bug (wrong structure for this data, or a stray adapter still attached).
91    #[error("Read of length {read_len} does not match fixed structure length {expected}")]
92    ReadTooLong { read_len: usize, expected: usize },
93
94    #[error("ReadSegment too short: {0}")]
95    ReadSegmentTooShort(String),
96
97    #[error("ReadSegment str contained more than one segment: {0}")]
98    ReadSegmentMultipleSegments(String),
99
100    #[error("ReadSegment must have length > 0 or `+`: {}[{}]{}", .0.prefix, .0.error, .0.suffix)]
101    ReadSegmentLengthZero(ErrorMessageParts),
102
103    #[error("Invalid SegmentType: {0}")]
104    ReadSegmentTypeInvalid(char),
105
106    #[error("Invalid SegmentType: {0}")]
107    ReadSegmentTypeStringInvalid(String),
108}
109
110/// Helper struct for isolating the erroneous portion of a string.
111#[derive(Debug)]
112pub struct ErrorMessageParts {
113    prefix: String,
114    error: String,
115    suffix: String,
116}
117
118impl ErrorMessageParts {
119    fn new(chars: &[char], start: usize, end: usize) -> Self {
120        let prefix: String = chars.iter().take(start).collect();
121        let error: String = chars.iter().skip(start).take(end - start).collect();
122        let suffix: String = if end == chars.len() {
123            "".to_string()
124        } else {
125            chars.iter().skip(end).take(chars.len() - end).collect()
126        };
127        Self { prefix, error, suffix }
128    }
129}