orphos_core/sequence/
io.rs

1use crate::types::*;
2use bio::io::fasta;
3use std::fs::File;
4
5/// Read sequence using rust-bio for FASTA files
6/// Type alias to simplify the complex return type
7pub type FastaRecord = (String, Option<String>, Vec<u8>);
8
9pub fn read_fasta_sequences(filename: &str) -> Result<Vec<FastaRecord>, OrphosError> {
10    let file = File::open(filename)?;
11    let reader = fasta::Reader::new(file);
12    let mut sequences = Vec::new();
13
14    for result in reader.records() {
15        let record = result.map_err(|e| OrphosError::ParseError(e.to_string()))?;
16        let id = record.id().to_string();
17        let description = record.desc().map(String::from);
18        let seq = record.seq().to_vec();
19        sequences.push((id, description, seq));
20    }
21
22    Ok(sequences)
23}
24
25#[cfg(test)]
26mod tests {
27    use super::*;
28
29    #[test]
30    fn test_read_fasta_sequences_basic() {
31        // Create a temporary FASTA file content
32        let fasta_content = ">test_sequence\nATCG\nGCTA\n";
33
34        // Write to temporary file for testing
35        use std::env;
36        use std::fs;
37        let temp_dir = env::temp_dir();
38        let temp_file = temp_dir.join("test_fasta.fa");
39        fs::write(&temp_file, fasta_content).unwrap();
40
41        let result = read_fasta_sequences(temp_file.to_str().unwrap());
42        assert!(result.is_ok());
43
44        let sequences = result.unwrap();
45        assert_eq!(sequences.len(), 1);
46        assert_eq!(sequences[0].0, "test_sequence");
47        assert_eq!(sequences[0].2.len(), 8); // ATCGGCTA
48
49        // Cleanup
50        let _ = fs::remove_file(temp_file);
51    }
52
53    #[test]
54    fn test_read_fasta_sequences_empty_file() {
55        use std::env;
56        use std::fs;
57        let temp_dir = env::temp_dir();
58        let temp_file = temp_dir.join("empty_fasta.fa");
59        fs::write(&temp_file, "").unwrap();
60
61        let result = read_fasta_sequences(temp_file.to_str().unwrap());
62        assert!(result.is_ok());
63
64        let sequences = result.unwrap();
65        assert!(sequences.is_empty());
66
67        let _ = fs::remove_file(temp_file);
68    }
69
70    #[test]
71    fn test_read_fasta_sequences_multiple() {
72        let fasta_content = ">seq1\nATCG\n>seq2\nGCTA\n>seq3\nTTAA\n";
73
74        use std::env;
75        use std::fs;
76        let temp_dir = env::temp_dir();
77        let temp_file = temp_dir.join("multi_fasta.fa");
78        fs::write(&temp_file, fasta_content).unwrap();
79
80        let result = read_fasta_sequences(temp_file.to_str().unwrap());
81        assert!(result.is_ok());
82
83        let sequences = result.unwrap();
84        assert_eq!(sequences.len(), 3);
85        assert_eq!(sequences[0].0, "seq1");
86        assert_eq!(sequences[1].0, "seq2");
87        assert_eq!(sequences[2].0, "seq3");
88
89        let _ = fs::remove_file(temp_file);
90    }
91
92    #[test]
93    fn test_read_fasta_sequences_with_description() {
94        let fasta_content = ">seq1 This is a test sequence\nATCG\n>seq2\nGCTA\n";
95
96        use std::env;
97        use std::fs;
98        let temp_dir = env::temp_dir();
99        let temp_file = temp_dir.join("desc_fasta.fa");
100        fs::write(&temp_file, fasta_content).unwrap();
101
102        let result = read_fasta_sequences(temp_file.to_str().unwrap());
103        assert!(result.is_ok());
104
105        let sequences = result.unwrap();
106        assert_eq!(sequences.len(), 2);
107        assert_eq!(sequences[0].0, "seq1");
108        assert_eq!(sequences[0].1, Some("This is a test sequence".to_string()));
109        assert_eq!(sequences[1].1, None);
110
111        let _ = fs::remove_file(temp_file);
112    }
113
114    #[test]
115    fn test_read_fasta_sequences_file_not_found() {
116        let result = read_fasta_sequences("nonexistent_file.fa");
117        assert!(result.is_err());
118        match result {
119            Err(OrphosError::IoError(_)) => {}
120            _ => panic!("Expected IoError for missing file"),
121        }
122    }
123
124    #[test]
125    fn test_read_fasta_sequences_invalid_format() {
126        use std::env;
127        use std::fs;
128        let temp_dir = env::temp_dir();
129        let temp_file = temp_dir.join("invalid_fasta.fa");
130        // Create an invalid FASTA file (binary data)
131        fs::write(&temp_file, vec![0x00, 0xFF, 0x80]).unwrap();
132
133        let _result = read_fasta_sequences(temp_file.to_str().unwrap());
134        let _ = fs::remove_file(temp_file);
135    }
136
137    #[test]
138    fn test_fasta_record_type_alias() {
139        // Test that the type alias works correctly
140        let record: FastaRecord = (
141            "test".to_string(),
142            Some("desc".to_string()),
143            vec![65, 84, 67, 71],
144        );
145        assert_eq!(record.0, "test");
146        assert_eq!(record.1, Some("desc".to_string()));
147        assert_eq!(record.2, vec![65, 84, 67, 71]);
148    }
149}