fqgrep_lib/
mod.rs

1#![deny(unsafe_code)]
2#![allow(
3    clippy::must_use_candidate,
4    clippy::missing_panics_doc,
5    clippy::missing_errors_doc,
6    clippy::module_name_repetitions
7)]
8pub mod color;
9pub mod matcher;
10pub mod seq_io;
11use std::{borrow::Borrow, path::Path, sync::LazyLock};
12
13pub const DNA_BASES: [u8; 5] = *b"ACGTN";
14pub const IUPAC_BASES: [u8; 15] = *b"AGCTYRWSKMDVHBN";
15pub const IUPAC_BASES_COMPLEMENT: [u8; 15] = *b"TCGARYWSMKHBDVN";
16
17static COMPLEMENT: LazyLock<[u8; 256]> = LazyLock::new(|| {
18    let mut comp = [0; 256];
19    for (v, a) in comp.iter_mut().enumerate() {
20        *a = v as u8;
21    }
22    for (&a, &b) in IUPAC_BASES.iter().zip(IUPAC_BASES_COMPLEMENT.iter()) {
23        comp[a as usize] = b;
24        comp[a as usize + 32] = b + 32; // lowercase variants
25    }
26    comp
27});
28
29fn complement(a: u8) -> u8 {
30    COMPLEMENT[a as usize]
31}
32
33fn reverse_complement<C, T>(text: T) -> Vec<u8>
34where
35    C: Borrow<u8>,
36    T: IntoIterator<Item = C>,
37    T::IntoIter: DoubleEndedIterator,
38{
39    text.into_iter()
40        .rev()
41        .map(|a| complement(*a.borrow()))
42        .collect()
43}
44
45/// Returns true if the path ends with a recognized GZIP file extension
46fn is_path_with_extension<P: AsRef<Path>>(p: &P, extensions: [&str; 2]) -> bool {
47    if let Some(ext) = p.as_ref().extension() {
48        match ext.to_str() {
49            Some(x) => extensions.contains(&x),
50            None => false,
51        }
52    } else {
53        false
54    }
55}
56
57/// The set of file extensions to treat as GZIPPED
58const GZIP_EXTENSIONS: [&str; 2] = ["gz", "bgz"];
59
60/// Returns true if the path ends with a recognized GZIP file extension
61pub fn is_gzip_path<P: AsRef<Path>>(p: &P) -> bool {
62    is_path_with_extension(p, GZIP_EXTENSIONS)
63}
64
65/// The set of file extensions to treat as FASTQ
66const FASTQ_EXTENSIONS: [&str; 2] = ["fastq", "fq"];
67
68/// Returns true if the path ends with a recognized FASTQ file extension
69pub fn is_fastq_path<P: AsRef<Path>>(p: &P) -> bool {
70    is_path_with_extension(p, FASTQ_EXTENSIONS)
71}
72
73// Tests
74#[cfg(test)]
75pub mod tests {
76    use crate::*;
77    use rstest::rstest;
78    use std::str;
79    use tempfile::TempDir;
80
81    // ############################################################################################
82    // Tests reverse_complement()
83    // ############################################################################################
84
85    #[rstest]
86    #[case("ACGT", "ACGT")] // Reverse complement with even length string
87    #[case("ACG", "CGT")] // Reverse complement with odd length string (tests for off by one error)
88    fn test_reverse_complement(#[case] seq: &str, #[case] expected: &str) {
89        let result = reverse_complement(seq.as_bytes());
90        let string_result = str::from_utf8(&result).unwrap();
91        assert_eq!(&string_result, &expected);
92    }
93
94    // ############################################################################################
95    // Tests is_gzip_path()
96    // ############################################################################################
97
98    #[rstest]
99    #[case("test_fastq.fq.gz", true)] // .fq.gz is valid gzip
100    #[case("test_fastq.fq.bgz", true)] // .fq.bgz is valid gzip
101    #[case("test_fastq.fq.tar", false)] // .fq.tar is invalid gzip
102    fn test_is_gzip_path(#[case] file_name: &str, #[case] expected: bool) {
103        let dir = TempDir::new().unwrap();
104        let file_path = dir.path().join(file_name);
105        let result = is_gzip_path(&file_path);
106        assert_eq!(result, expected);
107    }
108    // ############################################################################################
109    // Tests is_fastq_path()
110    // ############################################################################################
111
112    #[rstest]
113    #[case("test_fastq.fq", true)] // .fq is valid fastq
114    #[case("test_fastq.fastq", true)] // .fastq is valid fastq
115    #[case("test_fastq.sam", false)] // .sam is invalid fastq
116    fn test_is_fastq_path(#[case] file_name: &str, #[case] expected: bool) {
117        let dir = TempDir::new().unwrap();
118        let file_path = dir.path().join(file_name);
119        let result = is_fastq_path(&file_path);
120        assert_eq!(result, expected);
121    }
122}