ref_solver/parsing/
fai.rs1use std::io::BufReader;
7use std::path::Path;
8
9use crate::core::contig::Contig;
10use crate::core::header::QueryHeader;
11use crate::parsing::sam::ParseError;
12use crate::utils::validation::check_contig_limit;
13
14pub fn parse_fai_file(path: &Path) -> Result<QueryHeader, ParseError> {
22 use noodles::fasta;
23
24 let reader = std::fs::File::open(path).map(BufReader::new)?;
25
26 let index = fasta::fai::io::Reader::new(reader)
27 .read_index()
28 .map_err(|e| ParseError::Noodles(format!("Failed to parse FAI file: {e}")))?;
29
30 index_to_query(&index)
31}
32
33fn index_to_query(index: &noodles::fasta::fai::Index) -> Result<QueryHeader, ParseError> {
35 let mut contigs = Vec::new();
36
37 for record in index.as_ref() {
38 if check_contig_limit(contigs.len()).is_some() {
40 return Err(ParseError::TooManyContigs(contigs.len()));
41 }
42
43 let name = String::from_utf8_lossy(record.name()).to_string();
44 let length = record.length();
45
46 contigs.push(Contig::new(name, length));
47 }
48
49 if contigs.is_empty() {
50 return Err(ParseError::InvalidFormat(
51 "No contigs found in FAI file".to_string(),
52 ));
53 }
54
55 Ok(QueryHeader::new(contigs))
56}
57
58pub fn parse_fai_text(text: &str) -> Result<QueryHeader, ParseError> {
65 let mut contigs = Vec::new();
66
67 for line in text.lines() {
68 let line = line.trim();
69 if line.is_empty() || line.starts_with('#') {
70 continue;
71 }
72
73 let fields: Vec<&str> = line.split('\t').collect();
74 if fields.len() < 2 {
75 continue;
76 }
77
78 if check_contig_limit(contigs.len()).is_some() {
80 return Err(ParseError::TooManyContigs(contigs.len()));
81 }
82
83 let name = fields[0].to_string();
84 let length: u64 = fields[1].parse().map_err(|_| {
85 ParseError::InvalidFormat(format!(
86 "Invalid length for contig '{}': {}",
87 name, fields[1]
88 ))
89 })?;
90
91 contigs.push(Contig::new(name, length));
92 }
93
94 if contigs.is_empty() {
95 return Err(ParseError::InvalidFormat(
96 "No contigs found in FAI file".to_string(),
97 ));
98 }
99
100 Ok(QueryHeader::new(contigs))
101}
102
103#[cfg(test)]
104mod tests {
105 use super::*;
106
107 #[derive(Debug, Clone)]
109 pub struct FaiEntry {
110 pub name: String,
111 pub length: u64,
112 pub offset: u64,
113 pub line_bases: u32,
114 pub line_width: u32,
115 }
116
117 pub fn parse_fai_entries(text: &str) -> Result<Vec<FaiEntry>, ParseError> {
119 let mut entries = Vec::new();
120
121 for line in text.lines() {
122 let line = line.trim();
123 if line.is_empty() || line.starts_with('#') {
124 continue;
125 }
126
127 let fields: Vec<&str> = line.split('\t').collect();
128 if fields.len() < 5 {
129 return Err(ParseError::InvalidFormat(format!(
130 "FAI line has {} fields, expected 5: {}",
131 fields.len(),
132 line
133 )));
134 }
135
136 if check_contig_limit(entries.len()).is_some() {
138 return Err(ParseError::TooManyContigs(entries.len()));
139 }
140
141 let name = fields[0].to_string();
142 let length: u64 = fields[1]
143 .parse()
144 .map_err(|_| ParseError::InvalidFormat(format!("Invalid length: {}", fields[1])))?;
145 let offset: u64 = fields[2]
146 .parse()
147 .map_err(|_| ParseError::InvalidFormat(format!("Invalid offset: {}", fields[2])))?;
148 let line_bases: u32 = fields[3].parse().map_err(|_| {
149 ParseError::InvalidFormat(format!("Invalid line_bases: {}", fields[3]))
150 })?;
151 let line_width: u32 = fields[4].parse().map_err(|_| {
152 ParseError::InvalidFormat(format!("Invalid line_width: {}", fields[4]))
153 })?;
154
155 entries.push(FaiEntry {
156 name,
157 length,
158 offset,
159 line_bases,
160 line_width,
161 });
162 }
163
164 if entries.is_empty() {
165 return Err(ParseError::InvalidFormat(
166 "No entries found in FAI file".to_string(),
167 ));
168 }
169
170 Ok(entries)
171 }
172
173 #[test]
174 fn test_parse_fai_text() {
175 let fai = r"chr1 248956422 112 70 71
176chr2 242193529 253404903 70 71
177chrM 16569 3099922541 70 71
178";
179
180 let query = parse_fai_text(fai).unwrap();
181 assert_eq!(query.contigs.len(), 3);
182
183 assert_eq!(query.contigs[0].name, "chr1");
184 assert_eq!(query.contigs[0].length, 248_956_422);
185 assert!(query.contigs[0].md5.is_none()); assert_eq!(query.contigs[1].name, "chr2");
188 assert_eq!(query.contigs[1].length, 242_193_529);
189
190 assert_eq!(query.contigs[2].name, "chrM");
191 assert_eq!(query.contigs[2].length, 16569);
192 }
193
194 #[test]
195 fn test_parse_fai_entries() {
196 let fai = "chr1\t248956422\t112\t70\t71\n";
197
198 let entries = parse_fai_entries(fai).unwrap();
199 assert_eq!(entries.len(), 1);
200 assert_eq!(entries[0].name, "chr1");
201 assert_eq!(entries[0].length, 248_956_422);
202 assert_eq!(entries[0].offset, 112);
203 assert_eq!(entries[0].line_bases, 70);
204 assert_eq!(entries[0].line_width, 71);
205 }
206
207 #[test]
208 fn test_parse_fai_empty() {
209 let result = parse_fai_text("");
210 assert!(result.is_err());
211 }
212}