1use crate::index::FastaIndex;
4use crate::read::{FastaHandle, FastaReader};
5
6use std::collections::HashMap;
7use std::fs::File;
8use std::io::{BufRead, BufReader, BufWriter, Seek, SeekFrom, Write};
9use std::path::Path;
10
11#[derive(Debug, PartialEq)]
13pub struct FastaMap {
14 pub id_to_seq: HashMap<String, String>,
15}
16
17impl FastaMap {
18 pub fn from_fasta(path: &Path) -> Self {
19 let reader = FastaReader::new(path);
20 let mut entries: HashMap<String, String> = HashMap::new();
21 for [header, seq] in reader {
22 entries.insert(header, seq);
23 }
24 FastaMap { id_to_seq: entries }
25 }
26
27 pub fn from_index_with_ids(path: &Path, index: &FastaIndex, ids: &[String]) -> Self {
28 let mut res = HashMap::new();
29 let mut fasta_handle = FastaHandle::open_fasta(path);
30 if let FastaHandle::Compressed(_) = fasta_handle {
31 panic!(
32 "Tried to use index on non seekable compressed file: {:?}",
33 path
34 );
35 }
36
37 for k in ids {
38 if let Some(v) = index.id_to_offset.get(k) {
39 let mut seq_buf = String::new();
40 fasta_handle
41 .seek(SeekFrom::Start(*v))
42 .expect("File seek failed in `from_index_with_ids`.");
43
44 let mut seen_header = false;
45 for line in BufReader::new(&mut fasta_handle).lines() {
46 let lstring = line.unwrap();
47 if lstring.starts_with('>') {
48 if seen_header {
49 break;
50 } else {
51 seen_header = true;
52 }
53 } else if lstring == "" {
54 break;
55 } else {
56 seq_buf.push_str(&lstring);
57 }
58 }
59 res.insert((*k).to_string(), seq_buf);
60 }
61 }
62 FastaMap { id_to_seq: res }
63 }
64
65 pub fn to_fasta(&self, path: &Path) {
66 let mut f = match File::create(path) {
67 Err(why) => panic!("couldn't create {:?}: {:?}", path, why),
68 Ok(file) => BufWriter::new(file),
69 };
70 for (k, v) in self.id_to_seq.iter() {
71 if let Err(why) = f.write_all(format!(">{}\n", k).as_bytes()) {
72 panic!("couldn't write to {:?}: {:?}", path, why)
73 };
74 if let Err(why) = f.write_all(format!("{}\n\n", v).as_bytes()) {
75 panic!("couldn't write to {:?}: {:?}", path, why)
76 };
77 }
78 }
79}
80
81#[cfg(test)]
82mod tests {
83 use super::*;
84
85 #[test]
86 fn fasta_map_from_fasta() {
87 let mut exp_map = HashMap::new();
88 exp_map.insert(
89 ">Q2HZH0".to_string(),
90 "MATVPEPTSEMMSYYYSDNENDLFFEADGPRKMKCCFQDLNNSSLKDEGIQLHISHQLQN\
91 KSLRHFVSVVVALEKLKKISLPCSQPLQDDDLKNVFCCIFEEEPIVCEVYDDDAFVCDAP"
92 .to_string(),
93 );
94
95 exp_map.insert(
96 ">P93158".to_string(),
97 "TLKVPVHVPSPSEDAEWQLRKAFEGWGTNEQLIIDILAHRNAAQRNSIRKVYGEAYGEDL\
98 LKCLEKELTSDFERAVLLFTLDPAERDAHLANEATKKFTSSNWILMEIACSRSSHELLNV"
99 .to_string(),
100 );
101
102 exp_map.insert(
103 ">H0VS30".to_string(),
104 "MEAAAAAPRHQLLLLMLVAAAATLLPGAKALQCFCQLCAKDNYTCVTDGLCFVSITETTD\
105 RIIHNTMCIAEIDLIPRDRPFVCAPSSKTGAVTTTHCCNQDHCNKIELPTTEKQSSGLGP\
106 VELAAVIAGPVCFVCISLMLMVYICHNRTVIHHRVPNEEDPSLDRPFISEGTTLKDLIYD"
107 .to_string(),
108 );
109
110 assert_eq!(
111 FastaMap { id_to_seq: exp_map },
112 FastaMap::from_fasta(Path::new("./resources/test_short_descr.fasta"))
113 );
114 }
115}