1use std::fs::File;
2use std::io;
3use std::io::{BufRead, BufReader, Lines};
4use std::path::Path;
5
6use anyhow::{bail, Context, Result};
7use atty::{self, Stream};
8
9fn read_lines<P>(filename: P) -> Result<Lines<BufReader<File>>>
10where
11 P: AsRef<Path>,
12{
13 let file = File::open(filename)?;
14 Ok(BufReader::new(file).lines())
15}
16
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct ParsedInput {
19 pub sequence: Option<String>,
20 pub secondary_structure: Option<String>,
21 pub rna_name: Option<String>,
22 pub highlight: Option<String>,
23}
24
25fn empty_then_none(s: String) -> Option<String> {
26 if s.is_empty() {
27 None
28 } else {
29 Some(s)
30 }
31}
32
33impl ParsedInput {
34 pub fn from_file(input_file: &str) -> Result<Self> {
36 let mut lines = read_lines(input_file)
37 .with_context(|| format!("could not read file: {input_file}"))?
38 .map(|x| x.expect("invalid utf8?"));
39 Self::parse(&mut lines)
40 }
41
42 pub fn from_pipe() -> Result<Self> {
44 if atty::is(Stream::Stdin) {
45 bail!("No input provided! nothing to do :c")
46 }
47
48 let stdin = io::stdin();
49 let mut lines = stdin.lock().lines().map(|x| x.expect("invalid utf8?"));
50 Self::parse(&mut lines)
51 }
52
53 pub fn parse<L>(lines: &mut L) -> Result<Self>
59 where
60 L: Iterator<Item = String>,
61 {
62 let mut sequence = String::with_capacity(300);
63 let mut secondary_structure = String::with_capacity(300);
64 let mut highlight = String::with_capacity(300);
65 let mut rna_name: Option<String> = None;
66
67 for line in lines {
68 let trimmed = line.trim();
69 if trimmed.is_empty() {
70 continue;
71 }
72 match &trimmed[0..1].as_bytes()[0] {
73 0x41..=0x55 | 0x61..=0x75 => sequence.push_str(trimmed), 0x2e | 0x28 | 0x29 => secondary_structure.push_str(trimmed), 0x30..=0x39 => highlight.push_str(trimmed), 0x3e => rna_name = Some(line[1..].trim().replace(' ', "_")), _ => continue,
78 }
79 }
80
81 Ok(ParsedInput {
82 sequence: empty_then_none(sequence),
83 secondary_structure: empty_then_none(secondary_structure),
84 highlight: empty_then_none(highlight),
85 rna_name,
86 })
87 }
88}
89
90#[cfg(test)]
91mod tests {
92 use super::*;
93 const TENA: &str = r#">TPP_riboswitch
94 GCAGAACAATTCAATATGTATTCGTTTAACCACTAGGGGTGTCCTTCATAAGGGCTGAGA
95 TAAAAGTGTGACTTTTAGACCCTCATAACTTGAACAGGTTCAGACCTGCGTAGGGAAGTG
96 GAGCGGTATTTGTGTTATTTTACTATGCCAATTCCAAACCACTTTTCCTTGCGGGAAAGT
97 GGTTTTTTTA
98
99 .........(((..((((((...((((((((.....((((((((((...)))))).....
100 (((((((...))))))).))))(((.....)))...)))).)))).))))))..)))..(
101 (((.(((((..(((......))).)))))..))))(((((((((((((....))))))))
102 )))))....."#;
103 const TENA_SHUFFLED: &str = r#">TPP_riboswitch
104 GCAGAACAATTCAATATGTATTCGTTTAACCACTAGGGGTGTCCTTCATAAGGGCTGAGA
105 .........(((..((((((...((((((((.....((((((((((...)))))).....
106
107 TAAAAGTGTGACTTTTAGACCCTCATAACTTGAACAGGTTCAGACCTGCGTAGGGAAGTG
108 (((((((...))))))).))))(((.....)))...)))).)))).))))))..)))..(
109
110 GAGCGGTATTTGTGTTATTTTACTATGCCAATTCCAAACCACTTTTCCTTGCGGGAAAGT
111 (((.(((((..(((......))).)))))..))))(((((((((((((....))))))))
112
113 GGTTTTTTTA
114 )))))....."#;
115
116 const TENASEQ: &str = "GCAGAACAATTCAATATGTATTCGTTTAACCACTAGGGGTG\
117 TCCTTCATAAGGGCTGAGATAAAAGTGTGACTTTTAGACCCTCATAACTTGAACAGGTTC\
118 AGACCTGCGTAGGGAAGTGGAGCGGTATTTGTGTTATTTTACTATGCCAATTCCAAACCA\
119 CTTTTCCTTGCGGGAAAGTGGTTTTTTTA";
120 const TENASST: &str = ".........(((..((((((...((((((((.....(((((\
121 (((((...)))))).....(((((((...))))))).))))(((.....)))...)))).\
122 )))).))))))..)))..((((.(((((..(((......))).)))))..))))((((((\
123 (((((((....))))))))))))).....";
124 const TENANAME: &str = "TPP_riboswitch";
125
126 fn parse_helper(test_rna: &str) -> ParsedInput {
127 let mut lineiter = test_rna.split("\n").map(|x| x.to_string());
128 ParsedInput::parse(&mut lineiter).expect("failed parsing input")
129 }
130
131 #[test]
132 fn parse_simple_input() {
133 let seq =
134 "UUAUAGGCGAUGGAGUUCGCCAUAAACGCUGCUUAGCUAAUGACUCCUACCAGUAUCACUACUGGUAGGAGUCUAUUUUUUU";
135 let sst =
136 ".....(((((......)))))......(((....)))....((((((((((((((....)))))))))))))).........";
137 let name = "super molecule";
138 let name_out = "super_molecule";
139 let correct_pi = ParsedInput {
140 sequence: Some(seq.to_string()),
141 secondary_structure: Some(sst.to_string()),
142 rna_name: None,
143 highlight: None,
144 };
145
146 let test_rna = format!("{}\n{}\n", seq, sst);
147 let pi = parse_helper(&test_rna);
148 assert_eq!(correct_pi, pi);
149
150 let switched = format!("\n\n \t {} \t \n{}\n", sst, seq);
151 let pi = parse_helper(&switched);
152 assert_eq!(correct_pi, pi);
153
154 let only_sst = format!("\n{}\n", sst);
155 let pi = parse_helper(&only_sst);
156 let only_sst_correct_pi = ParsedInput {
157 sequence: None,
158 ..correct_pi.clone()
159 };
160 assert_eq!(only_sst_correct_pi, pi);
161
162 let with_name = format!("\n>{}\n\n{}\n{}\n", name, sst, seq);
163 let pi = parse_helper(&with_name);
164 let named_correct_pi = ParsedInput {
165 rna_name: Some(name_out.to_string()),
166 ..correct_pi
167 };
168 assert_eq!(named_correct_pi, pi);
169 }
170
171 #[test]
172 fn parse_multi_line() {
173 let correct_pi = ParsedInput {
174 sequence: TENASEQ.to_string().into(),
175 secondary_structure: TENASST.to_string().into(),
176 rna_name: TENANAME.to_string().into(),
177 highlight: None,
178 };
179
180 let pi = parse_helper(TENA);
181 assert_eq!(correct_pi, pi);
182
183 let pi = parse_helper(TENA_SHUFFLED);
184 assert_eq!(correct_pi, pi);
185 }
186}