use regex::Regex;
use regex::bytes::Regex as BytesRegex;
pub use util::{ExtractionRegex, ValidationRegex};
pub struct NucleotideRegex;
impl ValidationRegex<BytesRegex> for NucleotideRegex {
fn validate() -> &'static BytesRegex {
lazy_regex!(BytesRegex, r"(?-u)(?x)
\A
(?:
[ACGTacgt]+
)
\z
");
®EX
}
}
impl ExtractionRegex<BytesRegex> for NucleotideRegex {
fn extract() -> &'static BytesRegex {
lazy_regex!(BytesRegex, r"(?-u)(?x)
\A
# Group 1, Nucleotide Sequence
(
[ACGTacgt]+
)
\z
");
®EX
}
}
pub struct SequenceQualityRegex;
impl ValidationRegex<BytesRegex> for SequenceQualityRegex {
fn validate() -> &'static BytesRegex {
lazy_regex!(BytesRegex, r"(?-u)(?x)
\A
(?:
[[:print:]]+
)
\z
");
®EX
}
}
impl ExtractionRegex<BytesRegex> for SequenceQualityRegex {
fn extract() -> &'static BytesRegex {
lazy_regex!(BytesRegex, r"(?-u)(?x)
\A
# Group 1, Sequence Quality Scores
(
[[:print:]]+
)
\z
");
®EX
}
}
pub struct FastqHeaderRegex;
impl FastqHeaderRegex {
pub const SEQID_INDEX: usize = 1;
pub const DESCRIPTION_INDEX: usize = 2;
}
impl ValidationRegex<Regex> for FastqHeaderRegex {
fn validate() -> &'static Regex {
lazy_regex!(Regex, r"(?x)(?m)
\A
[@+]
(?:
[^[:space:]]+
)
\s
(?:
.*?
)
(?:
\slength=[[:digit:]]+
)?
\z
");
®EX
}
}
impl ExtractionRegex<Regex> for FastqHeaderRegex {
fn extract() -> &'static Regex {
lazy_regex!(Regex, r"(?x)(?m)
\A
[@+] # The symbol for a header line.
# Group 1, Sequence ID.
(
[^[:space:]]+
)
\s
# Group 2, Description.
(
.*?
)
# Optional length after description.
(?:
\slength=[[:digit:]]+
)?
\z
");
®EX
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn nucleotide_regex() {
type T = NucleotideRegex;
check_regex!(T, b"", false);
check_regex!(T, b"AAGTAGGTCTCGTCTGTGTTTTCTACGAGCTTGTGTTCCAGCTGACCCACTCCCTGGGTGGGGGGACTGGGT", true);
check_regex!(T, b"CCAGCCTGGCCAACAGAGTGTTACCCCGTTTTTACTTATTTATTATTATTATTTTGAGACAGAGCATTGGTC", true);
check_regex!(T, b"ATAAAATCAGGGGTGTTGGAGATGGGATGCCTATTTCTGCACACCTTGGCCTCCCAAATTGCTGGGATTACA", true);
check_regex!(T, b"TTAAGAAATTTTTGCTCAAACCATGCCCTAAAGGGTTCTGTAATAAATAGGGCTGGGAAAACTGGCAAGCCA", true);
check_regex!(T, b"AAGUAGGUCUCGUCUGUGUUUUCUACGAGCUUGUGUUCCAGCUGACCCACUCCCUGGGUGGGGGGACUGGGU", false);
check_regex!(T, b"CCAGCCUGGCCAACAGAGUGUUACCCCGUUUUUACUUAUUUAUUAUUAUUAUUUUGAGACAGAGCAUUGGUC", false);
check_regex!(T, b"AUAAAAUCAGGGGUGUUGGAGAUGGGAUGCCUAUUUCUGCACACCUUGGCCUCCCAAAUUGCUGGGAUUACA", false);
check_regex!(T, b"UUAAGAAAUUUUUGCUCAAACCAUGCCCUAAAGGGUUCUGUAAUAAAUAGGGCUGGGAAAACUGGCAAGCCA", false);
check_regex!(T, b"SAMPLER", false);
check_regex!(T, b"sampler", false);
check_regex!(T, b"sAmpLer", false);
}
#[test]
fn sequence_quality_regex() {
type T = SequenceQualityRegex;
check_regex!(T, b"", false);
check_regex!(T, b";;;;;;;;;;;;;;;;;4;;;;3;393.1+4&&5&&;;;;;;;;;;;;;;;;;;;;;<9;<;;;;;464262", true);
check_regex!(T, b"-;;;8;;;;;;;,*;;';-4,44;,:&,1,4'./&19;;;;;;669;;99;;;;;-;3;2;0;+;7442&2/", true);
check_regex!(T, b"1;;;;;;,;;4;3;38;8%&,,;)*;1;;,)/%4+,;1;;);;;;;;;4;(;1;;;;24;;;;41-444//0", true);
check_regex!(T, b";;;;;;;;;;;;;;;;;;;;;;;;;;;;;9445552;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;446662", true);
check_regex!(T, b"\r\n", false);
}
#[test]
fn fastq_header_regex() {
type T = FastqHeaderRegex;
check_regex!(T, "", false);
check_regex!(T, "@SRR390728.2 2 length=72", true);
check_regex!(T, "+SRR390728.2 2 length=72", true);
check_regex!(T, "@EAS139:136:FC706VJ:2:2104:15343:197393 1:N:18:1", true);
check_regex!(T, "+EAS139:136:FC706VJ:2:2104:15343:197393 1:N:18:1", true);
extract_regex!(T, "@SRR390728.2 2 length=72", 1, "SRR390728.2", as_str);
extract_regex!(T, "@SRR390728.2 2 length=72", 2, "2", as_str);
extract_regex!(T, "@EAS139:136:FC706VJ:2:2104:15343:197393 1:N:18:1", 1, "EAS139:136:FC706VJ:2:2104:15343:197393", as_str);
extract_regex!(T, "@EAS139:136:FC706VJ:2:2104:15343:197393 1:N:18:1", 2, "1:N:18:1", as_str);
}
}