use anyhow::{Context, Result};
use bio::io::fasta;
use std::path::{Path, PathBuf};
const FASTA_EXTS: &[&str] = &["fasta", "fa", "fna", "ffn", "faa"];
pub fn collect_fasta_files(input: &Path) -> Result<Vec<PathBuf>> {
if input.is_file() {
return Ok(vec![input.to_path_buf()]);
}
if !input.is_dir() {
anyhow::bail!("input {:?} is neither a file nor a directory", input);
}
let mut out = Vec::new();
for entry in std::fs::read_dir(input)
.with_context(|| format!("reading dir {:?}", input))?
{
let p = entry?.path();
if !p.is_file() {
continue;
}
if let Some(ext) = p.extension().and_then(|e| e.to_str()) {
let ext_lower = ext.to_ascii_lowercase();
if FASTA_EXTS.contains(&ext_lower.as_str()) {
out.push(p);
}
}
}
out.sort();
if out.is_empty() {
anyhow::bail!("no FASTA files found in {:?}", input);
}
Ok(out)
}
pub fn load_all_records(files: &[PathBuf]) -> Result<Vec<fasta::Record>> {
let mut all = Vec::new();
for f in files {
let reader = fasta::Reader::from_file(f)
.with_context(|| format!("opening FASTA {:?}", f))?;
for r in reader.records() {
all.push(r.with_context(|| format!("parsing record in {:?}", f))?);
}
}
Ok(all)
}