fasta/
map.rs

1//! A HashMap representation of a FASTA file.
2
3use crate::index::FastaIndex;
4use crate::read::{FastaHandle, FastaReader};
5
6use std::collections::HashMap;
7use std::fs::File;
8use std::io::{BufRead, BufReader, BufWriter, Seek, SeekFrom, Write};
9use std::path::Path;
10
11/// A HashMap representation of a Fasta file.
12#[derive(Debug, PartialEq)]
13pub struct FastaMap {
14    pub id_to_seq: HashMap<String, String>,
15}
16
17impl FastaMap {
18    pub fn from_fasta(path: &Path) -> Self {
19        let reader = FastaReader::new(path);
20        let mut entries: HashMap<String, String> = HashMap::new();
21        for [header, seq] in reader {
22            entries.insert(header, seq);
23        }
24        FastaMap { id_to_seq: entries }
25    }
26
27    pub fn from_index_with_ids(path: &Path, index: &FastaIndex, ids: &[String]) -> Self {
28        let mut res = HashMap::new();
29        let mut fasta_handle = FastaHandle::open_fasta(path);
30        if let FastaHandle::Compressed(_) = fasta_handle {
31            panic!(
32                "Tried to use index on non seekable compressed file: {:?}",
33                path
34            );
35        }
36
37        for k in ids {
38            if let Some(v) = index.id_to_offset.get(k) {
39                let mut seq_buf = String::new();
40                fasta_handle
41                    .seek(SeekFrom::Start(*v))
42                    .expect("File seek failed in `from_index_with_ids`.");
43
44                let mut seen_header = false;
45                for line in BufReader::new(&mut fasta_handle).lines() {
46                    let lstring = line.unwrap();
47                    if lstring.starts_with('>') {
48                        if seen_header {
49                            break;
50                        } else {
51                            seen_header = true;
52                        }
53                    } else if lstring == "" {
54                        break;
55                    } else {
56                        seq_buf.push_str(&lstring);
57                    }
58                }
59                res.insert((*k).to_string(), seq_buf);
60            }
61        }
62        FastaMap { id_to_seq: res }
63    }
64
65    pub fn to_fasta(&self, path: &Path) {
66        let mut f = match File::create(path) {
67            Err(why) => panic!("couldn't create {:?}: {:?}", path, why),
68            Ok(file) => BufWriter::new(file),
69        };
70        for (k, v) in self.id_to_seq.iter() {
71            if let Err(why) = f.write_all(format!(">{}\n", k).as_bytes()) {
72                panic!("couldn't write to {:?}: {:?}", path, why)
73            };
74            if let Err(why) = f.write_all(format!("{}\n\n", v).as_bytes()) {
75                panic!("couldn't write to {:?}: {:?}", path, why)
76            };
77        }
78    }
79}
80
81#[cfg(test)]
82mod tests {
83    use super::*;
84
85    #[test]
86    fn fasta_map_from_fasta() {
87        let mut exp_map = HashMap::new();
88        exp_map.insert(
89            ">Q2HZH0".to_string(),
90            "MATVPEPTSEMMSYYYSDNENDLFFEADGPRKMKCCFQDLNNSSLKDEGIQLHISHQLQN\
91            KSLRHFVSVVVALEKLKKISLPCSQPLQDDDLKNVFCCIFEEEPIVCEVYDDDAFVCDAP"
92                .to_string(),
93        );
94
95        exp_map.insert(
96            ">P93158".to_string(),
97            "TLKVPVHVPSPSEDAEWQLRKAFEGWGTNEQLIIDILAHRNAAQRNSIRKVYGEAYGEDL\
98            LKCLEKELTSDFERAVLLFTLDPAERDAHLANEATKKFTSSNWILMEIACSRSSHELLNV"
99                .to_string(),
100        );
101
102        exp_map.insert(
103            ">H0VS30".to_string(),
104            "MEAAAAAPRHQLLLLMLVAAAATLLPGAKALQCFCQLCAKDNYTCVTDGLCFVSITETTD\
105            RIIHNTMCIAEIDLIPRDRPFVCAPSSKTGAVTTTHCCNQDHCNKIELPTTEKQSSGLGP\
106            VELAAVIAGPVCFVCISLMLMVYICHNRTVIHHRVPNEEDPSLDRPFISEGTTLKDLIYD"
107                .to_string(),
108        );
109
110        assert_eq!(
111            FastaMap { id_to_seq: exp_map },
112            FastaMap::from_fasta(Path::new("./resources/test_short_descr.fasta"))
113        );
114    }
115}