1use std::path::Path;
4
5use cyanea_core::{CyaneaError, Result};
6use needletail::parse_fastx_file;
7
8#[derive(Debug, Clone)]
10#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
11pub struct FastaStats {
12 pub sequence_count: u64,
13 pub total_bases: u64,
14 pub gc_content: f64,
15 pub avg_length: f64,
16}
17
18pub fn parse_fasta_stats(path: impl AsRef<Path>) -> Result<FastaStats> {
20 let path = path.as_ref();
21 let mut reader = parse_fastx_file(path).map_err(|e| CyaneaError::Parse(e.to_string()))?;
22
23 let mut sequence_count: u64 = 0;
24 let mut total_bases: u64 = 0;
25 let mut gc_count: u64 = 0;
26
27 while let Some(record) = reader.next() {
28 let record = record.map_err(|e| CyaneaError::Parse(e.to_string()))?;
29 let seq = record.seq();
30
31 sequence_count += 1;
32 total_bases += seq.len() as u64;
33
34 for &base in seq.iter() {
35 match base {
36 b'G' | b'g' | b'C' | b'c' => gc_count += 1,
37 _ => {}
38 }
39 }
40 }
41
42 let gc_content = if total_bases > 0 {
43 (gc_count as f64 / total_bases as f64) * 100.0
44 } else {
45 0.0
46 };
47
48 let avg_length = if sequence_count > 0 {
49 total_bases as f64 / sequence_count as f64
50 } else {
51 0.0
52 };
53
54 Ok(FastaStats {
55 sequence_count,
56 total_bases,
57 gc_content,
58 avg_length,
59 })
60}
61
62#[cfg(test)]
63mod tests {
64 use super::*;
65 use std::io::Write;
66 use tempfile::NamedTempFile;
67
68 #[test]
69 fn test_fasta_parsing() {
70 let mut file = NamedTempFile::new().unwrap();
71 writeln!(file, ">seq1").unwrap();
72 writeln!(file, "ATCGATCG").unwrap();
73 writeln!(file, ">seq2").unwrap();
74 writeln!(file, "GCGCGCGC").unwrap();
75 file.flush().unwrap();
76
77 let stats = parse_fasta_stats(file.path()).unwrap();
78 assert_eq!(stats.sequence_count, 2);
79 assert_eq!(stats.total_bases, 16);
80 assert!((stats.gc_content - 75.0).abs() < 0.01);
81 assert!((stats.avg_length - 8.0).abs() < 0.01);
82 }
83
84 #[test]
85 fn test_empty_fasta() {
86 let file = NamedTempFile::new().unwrap();
87
88 let result = parse_fasta_stats(file.path());
90 assert!(result.is_err());
91 }
92
93 #[test]
94 fn test_fasta_file_not_found() {
95 let result = parse_fasta_stats("/nonexistent/file.fasta");
96 assert!(result.is_err());
97 }
98}