1use anyhow::{bail, Context, Result};
2use atty::Stream;
3use gfa::optfields::{OptField, OptFieldVal::*};
4use petgraph::graph::NodeIndex;
5use std::collections::HashMap;
6use std::fmt;
7
8pub fn format_usize_to_kb(num: usize) -> String {
10 let div = num as f32 / 1000f32;
11 format!("{:.2}Kb", div)
12}
13
14pub fn is_stdin() -> bool {
16 !atty::is(Stream::Stdin)
17}
18
19pub fn get_edge_coverage(options: &[OptField]) -> Result<i64> {
21 if let Some(op) = options.iter().next() {
22 match op.tag {
23 [101, 99] => match op.value {
25 Int(i) => return Ok(i),
26 _ => bail!("Could not find integer ec:i:<i64> tag."),
27 },
28 _ => bail!("Could not find ec (edge coverage) tag."),
29 };
30 }
31 bail!("Edge coverage not found.")
32}
33
34pub fn get_option_string(options: Vec<OptField>) -> Result<String> {
36 let mut tag_val = String::new();
37 for op in options {
38 let tag = std::str::from_utf8(&op.tag)
39 .with_context(|| format!("Malformed UTF8: {:?}", op.tag))?;
40 let value = match op.value {
41 Float(f) => format!(":f:{:.3}", f),
42 A(a) => format!(":A:{}", a),
43 Int(i) => format!(":i:{}", i),
44 Z(z) => format!(
45 ":Z:{}",
46 std::str::from_utf8(&z).with_context(|| format!("Malformed UTF8: {:?}", z))?
47 ),
48 H(h) => format!(":H:{}", h.iter().map(|x| x.to_string()).collect::<String>()),
51 BInt(bi) => format!(
54 ":B:{}",
55 bi.iter().map(|x| x.to_string()).collect::<String>()
56 ),
57 BFloat(bf) => format!(
58 ":B:{}",
59 bf.iter().map(|x| format!("{:.3}", x)).collect::<String>()
60 ),
61 _ => "".to_string(),
62 };
63 tag_val += &format!("{}{}\t", tag, value);
64 }
65 let tag_val_op_un = tag_val
67 .strip_suffix('\t')
68 .context("Could not strip a tab from the suffix of the tag.")?;
69 Ok(tag_val_op_un.to_string())
70}
71
72pub fn parse_cigar(cigar: &[u8]) -> Result<usize> {
74 if !cigar.ends_with(&[77]) {
76 bail!(
77 "CIGAR string did not end with M: {}",
78 std::str::from_utf8(cigar).with_context(|| format!("Malformed UTF8: {:?}", cigar))?
79 );
80 }
81 let stripped = cigar.strip_suffix(&[77]);
82 match stripped {
83 Some(s) => {
84 let string_rep =
85 std::str::from_utf8(s).with_context(|| format!("Malformed UTF8: {:?}", s))?;
86 Ok(string_rep
87 .parse::<usize>()
88 .with_context(|| format!("{} could not be parsed to <usize>", string_rep))?)
89 }
90 None => bail!("Could not strip suffix (M) of the CIGAR string."),
91 }
92}
93
94pub fn reverse_complement(dna: &[u8]) -> Vec<u8> {
96 let dna_vec = dna.to_vec();
97 let mut revcomp = Vec::new();
98
99 for base in dna_vec.iter() {
100 revcomp.push(switch_base(*base))
101 }
102 revcomp.as_mut_slice().reverse();
103 revcomp
104}
105
106fn switch_base(c: u8) -> u8 {
108 match c {
109 b'A' => b'T',
110 b'a' => b't',
111 b'C' => b'G',
112 b'c' => b'g',
113 b'T' => b'A',
114 b't' => b'a',
115 b'G' => b'C',
116 b'g' => b'c',
117 b'N' => b'N',
118 b'n' => b'n',
119 _ => b'N',
120 }
121}
122
123fn nucleotide_counts(dna: &[u8]) -> HashMap<&u8, i32> {
128 let mut map = HashMap::new();
129 for nucleotide in dna {
130 let count = map.entry(nucleotide).or_insert(0);
131 *count += 1;
132 }
133 map
134}
135
136pub fn gc_content(dna: &[u8]) -> f32 {
138 let counts = nucleotide_counts(dna);
141 let g_counts = counts.get(&71).unwrap_or(&0) + counts.get(&103).unwrap_or(&0);
142 let c_counts = counts.get(&67).unwrap_or(&0) + counts.get(&99).unwrap_or(&0);
143 let a_counts = counts.get(&65).unwrap_or(&0) + counts.get(&97).unwrap_or(&0);
144 let t_counts = counts.get(&84).unwrap_or(&0) + counts.get(&116).unwrap_or(&0);
145
146 (g_counts + c_counts) as f32 / (g_counts + c_counts + a_counts + t_counts) as f32
147}
148
149#[derive(Clone, Copy, Debug)]
157pub struct GFAGraphPair {
158 pub node_index: NodeIndex,
160 pub seg_id: usize,
162}
163#[derive(Clone, Debug)]
167pub struct GFAGraphLookups(pub Vec<GFAGraphPair>);
168
169impl GFAGraphLookups {
170 pub fn new() -> Self {
172 Self(Vec::new())
173 }
174 pub fn push(&mut self, other: GFAGraphPair) {
176 self.0.push(other);
177 }
178
179 pub fn node_index_to_seg_id(&self, node_index: NodeIndex) -> Result<usize> {
181 let seg_id = &self
182 .0
183 .iter()
184 .find(|e| e.node_index == node_index)
185 .with_context(|| {
186 format!(
187 "Node index {:?} could not be converted to segment ID",
188 node_index
189 )
190 })?
191 .seg_id;
192
193 Ok(*seg_id)
194 }
195 pub fn seg_id_to_node_index(&self, seg_id: usize) -> Result<NodeIndex> {
197 let node_index = &self
198 .0
199 .iter()
200 .find(|e| e.seg_id == seg_id)
201 .with_context(|| {
202 format!(
203 "Segment ID {:?} could not be converted to NodeIndex",
204 seg_id
205 )
206 })?
207 .node_index;
208
209 Ok(*node_index)
210 }
211}
212
213impl fmt::Display for GFAGraphLookups {
214 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
215 let mut output = String::new();
216 output += "\n\tSegment ID's:\n\t";
217
218 let mut seg_ids: String = self
219 .0
220 .iter()
221 .map(|pair| pair.seg_id.to_string() + ", ")
222 .collect();
223 seg_ids.drain(seg_ids.len() - 2..);
224
225 output += &seg_ids;
226
227 writeln!(f, "{}", output)
228 }
229}
230
231#[cfg(test)]
232mod tests {
233
234 use super::*;
235
236 #[test]
237 fn test_node_seg_id_indexes() {
238 let mut gl = GFAGraphLookups::new();
239 gl.push(GFAGraphPair {
241 node_index: NodeIndex::new(1),
242 seg_id: 12,
243 });
244
245 gl.push(GFAGraphPair {
246 node_index: NodeIndex::new(2),
247 seg_id: 10,
248 });
249
250 assert_eq!(12, gl.node_index_to_seg_id(NodeIndex::new(1)).unwrap());
251 assert_eq!(NodeIndex::new(2), gl.seg_id_to_node_index(10).unwrap());
252 }
253
254 #[test]
255 fn test_gc_content() {
256 let dna_bytes = vec![b'A', b'G', b'G', b'T', b'T', b'C'];
257
258 let gc = gc_content(&dna_bytes);
259
260 assert_eq!(gc, 0.5);
261 }
262
263 #[test]
264 fn test_cigar_parse() {
265 let cigar_ok = "120M".as_bytes();
266 let cigar_err = "30M10D20M5I10M".as_bytes();
267
268 let parsed_cigar = parse_cigar(cigar_ok).is_ok();
269 let parsed_cigar2 = parse_cigar(cigar_err).is_err();
270
271 assert!(parsed_cigar);
272 assert!(parsed_cigar2);
273 }
274}