rnapkin/
utils.rs

1use std::fs::File;
2use std::io;
3use std::io::{BufRead, BufReader, Lines};
4use std::path::Path;
5
6use anyhow::{bail, Context, Result};
7use atty::{self, Stream};
8
9fn read_lines<P>(filename: P) -> Result<Lines<BufReader<File>>>
10where
11    P: AsRef<Path>,
12{
13    let file = File::open(filename)?;
14    Ok(BufReader::new(file).lines())
15}
16
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct ParsedInput {
19    pub sequence: Option<String>,
20    pub secondary_structure: Option<String>,
21    pub rna_name: Option<String>,
22    pub highlight: Option<String>,
23}
24
25fn empty_then_none(s: String) -> Option<String> {
26    if s.is_empty() {
27        None
28    } else {
29        Some(s)
30    }
31}
32
33impl ParsedInput {
34    /// reads file and passes it to Self::parse
35    pub fn from_file(input_file: &str) -> Result<Self> {
36        let mut lines = read_lines(input_file)
37            .with_context(|| format!("could not read file: {input_file}"))?
38            .map(|x| x.expect("invalid utf8?"));
39        Self::parse(&mut lines)
40    }
41
42    /// reads stdin and passes it to Self::parse
43    pub fn from_pipe() -> Result<Self> {
44        if atty::is(Stream::Stdin) {
45            bail!("No input provided! nothing to do :c")
46        }
47
48        let stdin = io::stdin();
49        let mut lines = stdin.lock().lines().map(|x| x.expect("invalid utf8?"));
50        Self::parse(&mut lines)
51    }
52
53    /// parses user provided input reads lines and checks the first byte:
54    /// interprets A-Ua-u as nucleotides
55    /// interprets .() as secondary structure
56    /// interprets > as name
57    /// ignores everything else
58    pub fn parse<L>(lines: &mut L) -> Result<Self>
59    where
60        L: Iterator<Item = String>,
61    {
62        let mut sequence = String::with_capacity(300);
63        let mut secondary_structure = String::with_capacity(300);
64        let mut highlight = String::with_capacity(300);
65        let mut rna_name: Option<String> = None;
66
67        for line in lines {
68            let trimmed = line.trim();
69            if trimmed.is_empty() {
70                continue;
71            }
72            match &trimmed[0..1].as_bytes()[0] {
73                0x41..=0x55 | 0x61..=0x75 => sequence.push_str(trimmed), // [A-Ua-u] can catch some non nt but then the input is doomed anyway
74                0x2e | 0x28 | 0x29 => secondary_structure.push_str(trimmed), // .()
75                0x30..=0x39 => highlight.push_str(trimmed),              // 0-9
76                0x3e => rna_name = Some(line[1..].trim().replace(' ', "_")), // >
77                _ => continue,
78            }
79        }
80
81        Ok(ParsedInput {
82            sequence: empty_then_none(sequence),
83            secondary_structure: empty_then_none(secondary_structure),
84            highlight: empty_then_none(highlight),
85            rna_name,
86        })
87    }
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93    const TENA: &str = r#">TPP_riboswitch
94        GCAGAACAATTCAATATGTATTCGTTTAACCACTAGGGGTGTCCTTCATAAGGGCTGAGA
95        TAAAAGTGTGACTTTTAGACCCTCATAACTTGAACAGGTTCAGACCTGCGTAGGGAAGTG
96        GAGCGGTATTTGTGTTATTTTACTATGCCAATTCCAAACCACTTTTCCTTGCGGGAAAGT
97        GGTTTTTTTA
98
99        .........(((..((((((...((((((((.....((((((((((...)))))).....
100        (((((((...))))))).))))(((.....)))...)))).)))).))))))..)))..(
101        (((.(((((..(((......))).)))))..))))(((((((((((((....))))))))
102        )))))....."#;
103    const TENA_SHUFFLED: &str = r#">TPP_riboswitch
104        GCAGAACAATTCAATATGTATTCGTTTAACCACTAGGGGTGTCCTTCATAAGGGCTGAGA
105        .........(((..((((((...((((((((.....((((((((((...)))))).....
106
107        TAAAAGTGTGACTTTTAGACCCTCATAACTTGAACAGGTTCAGACCTGCGTAGGGAAGTG
108        (((((((...))))))).))))(((.....)))...)))).)))).))))))..)))..(
109
110        GAGCGGTATTTGTGTTATTTTACTATGCCAATTCCAAACCACTTTTCCTTGCGGGAAAGT
111        (((.(((((..(((......))).)))))..))))(((((((((((((....))))))))
112
113        GGTTTTTTTA
114        )))))....."#;
115
116    const TENASEQ: &str = "GCAGAACAATTCAATATGTATTCGTTTAACCACTAGGGGTG\
117        TCCTTCATAAGGGCTGAGATAAAAGTGTGACTTTTAGACCCTCATAACTTGAACAGGTTC\
118        AGACCTGCGTAGGGAAGTGGAGCGGTATTTGTGTTATTTTACTATGCCAATTCCAAACCA\
119        CTTTTCCTTGCGGGAAAGTGGTTTTTTTA";
120    const TENASST: &str = ".........(((..((((((...((((((((.....(((((\
121        (((((...)))))).....(((((((...))))))).))))(((.....)))...)))).\
122        )))).))))))..)))..((((.(((((..(((......))).)))))..))))((((((\
123        (((((((....))))))))))))).....";
124    const TENANAME: &str = "TPP_riboswitch";
125
126    fn parse_helper(test_rna: &str) -> ParsedInput {
127        let mut lineiter = test_rna.split("\n").map(|x| x.to_string());
128        ParsedInput::parse(&mut lineiter).expect("failed parsing input")
129    }
130
131    #[test]
132    fn parse_simple_input() {
133        let seq =
134            "UUAUAGGCGAUGGAGUUCGCCAUAAACGCUGCUUAGCUAAUGACUCCUACCAGUAUCACUACUGGUAGGAGUCUAUUUUUUU";
135        let sst =
136            ".....(((((......)))))......(((....)))....((((((((((((((....)))))))))))))).........";
137        let name = "super molecule";
138        let name_out = "super_molecule";
139        let correct_pi = ParsedInput {
140            sequence: Some(seq.to_string()),
141            secondary_structure: Some(sst.to_string()),
142            rna_name: None,
143            highlight: None,
144        };
145
146        let test_rna = format!("{}\n{}\n", seq, sst);
147        let pi = parse_helper(&test_rna);
148        assert_eq!(correct_pi, pi);
149
150        let switched = format!("\n\n \t {} \t \n{}\n", sst, seq);
151        let pi = parse_helper(&switched);
152        assert_eq!(correct_pi, pi);
153
154        let only_sst = format!("\n{}\n", sst);
155        let pi = parse_helper(&only_sst);
156        let only_sst_correct_pi = ParsedInput {
157            sequence: None,
158            ..correct_pi.clone()
159        };
160        assert_eq!(only_sst_correct_pi, pi);
161
162        let with_name = format!("\n>{}\n\n{}\n{}\n", name, sst, seq);
163        let pi = parse_helper(&with_name);
164        let named_correct_pi = ParsedInput {
165            rna_name: Some(name_out.to_string()),
166            ..correct_pi
167        };
168        assert_eq!(named_correct_pi, pi);
169    }
170
171    #[test]
172    fn parse_multi_line() {
173        let correct_pi = ParsedInput {
174            sequence: TENASEQ.to_string().into(),
175            secondary_structure: TENASST.to_string().into(),
176            rna_name: TENANAME.to_string().into(),
177            highlight: None,
178        };
179
180        let pi = parse_helper(TENA);
181        assert_eq!(correct_pi, pi);
182
183        let pi = parse_helper(TENA_SHUFFLED);
184        assert_eq!(correct_pi, pi);
185    }
186}