use std::io::BufReader;
use std::path::Path;
use crate::core::contig::Contig;
use crate::core::header::QueryHeader;
use crate::parsing::sam::ParseError;
use crate::utils::validation::check_contig_limit;
pub fn parse_fai_file(path: &Path) -> Result<QueryHeader, ParseError> {
use noodles::fasta;
let reader = std::fs::File::open(path).map(BufReader::new)?;
let index = fasta::fai::io::Reader::new(reader)
.read_index()
.map_err(|e| ParseError::Noodles(format!("Failed to parse FAI file: {e}")))?;
index_to_query(&index)
}
fn index_to_query(index: &noodles::fasta::fai::Index) -> Result<QueryHeader, ParseError> {
let mut contigs = Vec::new();
for record in index.as_ref() {
if check_contig_limit(contigs.len()).is_some() {
return Err(ParseError::TooManyContigs(contigs.len()));
}
let name = String::from_utf8_lossy(record.name()).to_string();
let length = record.length();
contigs.push(Contig::new(name, length));
}
if contigs.is_empty() {
return Err(ParseError::InvalidFormat(
"No contigs found in FAI file".to_string(),
));
}
Ok(QueryHeader::new(contigs))
}
pub fn parse_fai_text(text: &str) -> Result<QueryHeader, ParseError> {
let mut contigs = Vec::new();
for line in text.lines() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
let fields: Vec<&str> = line.split('\t').collect();
if fields.len() < 2 {
continue;
}
if check_contig_limit(contigs.len()).is_some() {
return Err(ParseError::TooManyContigs(contigs.len()));
}
let name = fields[0].to_string();
let length: u64 = fields[1].parse().map_err(|_| {
ParseError::InvalidFormat(format!(
"Invalid length for contig '{}': {}",
name, fields[1]
))
})?;
contigs.push(Contig::new(name, length));
}
if contigs.is_empty() {
return Err(ParseError::InvalidFormat(
"No contigs found in FAI file".to_string(),
));
}
Ok(QueryHeader::new(contigs))
}
#[cfg(test)]
mod tests {
use super::*;
#[derive(Debug, Clone)]
pub struct FaiEntry {
pub name: String,
pub length: u64,
pub offset: u64,
pub line_bases: u32,
pub line_width: u32,
}
pub fn parse_fai_entries(text: &str) -> Result<Vec<FaiEntry>, ParseError> {
let mut entries = Vec::new();
for line in text.lines() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
let fields: Vec<&str> = line.split('\t').collect();
if fields.len() < 5 {
return Err(ParseError::InvalidFormat(format!(
"FAI line has {} fields, expected 5: {}",
fields.len(),
line
)));
}
if check_contig_limit(entries.len()).is_some() {
return Err(ParseError::TooManyContigs(entries.len()));
}
let name = fields[0].to_string();
let length: u64 = fields[1]
.parse()
.map_err(|_| ParseError::InvalidFormat(format!("Invalid length: {}", fields[1])))?;
let offset: u64 = fields[2]
.parse()
.map_err(|_| ParseError::InvalidFormat(format!("Invalid offset: {}", fields[2])))?;
let line_bases: u32 = fields[3].parse().map_err(|_| {
ParseError::InvalidFormat(format!("Invalid line_bases: {}", fields[3]))
})?;
let line_width: u32 = fields[4].parse().map_err(|_| {
ParseError::InvalidFormat(format!("Invalid line_width: {}", fields[4]))
})?;
entries.push(FaiEntry {
name,
length,
offset,
line_bases,
line_width,
});
}
if entries.is_empty() {
return Err(ParseError::InvalidFormat(
"No entries found in FAI file".to_string(),
));
}
Ok(entries)
}
#[test]
fn test_parse_fai_text() {
let fai = r"chr1 248956422 112 70 71
chr2 242193529 253404903 70 71
chrM 16569 3099922541 70 71
";
let query = parse_fai_text(fai).unwrap();
assert_eq!(query.contigs.len(), 3);
assert_eq!(query.contigs[0].name, "chr1");
assert_eq!(query.contigs[0].length, 248_956_422);
assert!(query.contigs[0].md5.is_none());
assert_eq!(query.contigs[1].name, "chr2");
assert_eq!(query.contigs[1].length, 242_193_529);
assert_eq!(query.contigs[2].name, "chrM");
assert_eq!(query.contigs[2].length, 16569);
}
#[test]
fn test_parse_fai_entries() {
let fai = "chr1\t248956422\t112\t70\t71\n";
let entries = parse_fai_entries(fai).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].name, "chr1");
assert_eq!(entries[0].length, 248_956_422);
assert_eq!(entries[0].offset, 112);
assert_eq!(entries[0].line_bases, 70);
assert_eq!(entries[0].line_width, 71);
}
#[test]
fn test_parse_fai_empty() {
let result = parse_fai_text("");
assert!(result.is_err());
}
}