use std::{fs, io};
use std::fs::File;
use std::path::{Path, PathBuf};
use crate::error::e_exit;
use bio::io::{fasta, fastq, gff};
use bio::io::gff::GffType;
pub enum FileType {
Fasta, Fastq, Gff, Unknown, }
impl FileType {
pub fn infer_file_type(path: &PathBuf) -> FileType {
path.extension()
.and_then(|ext| ext.to_str())
.map(|ext| match ext.to_lowercase().as_str() {
"fa" | "fasta" | "pep" => FileType::Fasta, "gff" | "gff3" => FileType::Gff, "fq" | "fastq" => FileType::Fastq, _ => FileType::Unknown
})
.unwrap_or(FileType::Unknown)
}
}
pub struct MultiFormatWriter {
pub fa: fasta::Writer<File>, pub fq: fastq::Writer<File>, pub gff: gff::Writer<File>, }
impl MultiFormatWriter {
pub fn new(path: &PathBuf) -> io::Result<Self> {
let file = File::create(path)?;
Ok(Self {
fa: fasta::Writer::new(file.try_clone()?),
gff: gff::Writer::new(file.try_clone()?, GffType::GFF3), fq: fastq::Writer::new(file),
})
}
}
pub fn try_file_type_ext(file: &Path) -> Result<String, Box<dyn std::error::Error>> {
let ext = file.extension().unwrap().to_str().unwrap();
match ext {
"fasta" | "fa" => Ok("fasta".to_string()), "fastq" | "fq" => Ok("fastq".to_string()), "gff" | "gtf" => Ok("gff".to_string()), "bed" => Ok("bed".to_string()), "sam" => Ok("sam".to_string()), "bam" => Ok("bam".to_string()), _ => Err(format!("Unknown file extension: {:?}", ext).into()),
}
}
pub fn try_seq_type_seq(seq: &[u8]) -> String {
if seq.is_empty() {
eprintln!("Empty sequence");
}
let (mut is_dna, mut is_rna, mut is_protein) = (true, true, true);
for &c in seq {
let c_upper = c.to_ascii_uppercase();
let mut valid_in_any = false;
if is_dna {
if matches!(c_upper, b'A' | b'T' | b'C' | b'G' | b'N') {
valid_in_any = true;
} else {
is_dna = false;
}
}
if is_rna {
if matches!(c_upper, b'A' | b'U' | b'C' | b'G') {
valid_in_any = true;
} else {
is_rna = false;
}
}
if is_protein {
if matches!(
c_upper,
b'A' | b'R'
| b'N'
| b'D'
| b'C'
| b'E'
| b'Q'
| b'G'
| b'H'
| b'I'
| b'L'
| b'K'
| b'M'
| b'F'
| b'P'
| b'S'
| b'T'
| b'W'
| b'Y'
| b'V'
| b'B'
| b'J'
| b'O'
| b'U'
| b'X'
| b'Z'
) {
valid_in_any = true;
} else {
is_protein = false;
}
}
if !valid_in_any {
eprintln!("Invalid character: {}", c as char);
}
if only_one_true(is_dna, is_rna, is_protein) {
break;
}
}
if is_dna {
"DNA".into()
} else if is_rna {
"RNA".into()
} else if is_protein {
"Protein".into()
} else {
"Unknown sequence type".into()
}
}
fn only_one_true(a: bool, b: bool, c: bool) -> bool {
(a as u8 + b as u8 + c as u8) == 1
}
pub fn write_file<P: AsRef<Path>>(path: P, content: &str) {
fs::write(path, content).expect("Unable to write file");
}
pub fn is_directory_path(path: &PathBuf) -> bool {
path.extension().map_or(true, |ext| {
ext.is_empty() || path.as_os_str().to_str().unwrap().ends_with('.')
})
}
pub fn create_file_with_dir(path: &Path) {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).unwrap_or_else(|e| {
e_exit("DIR", &format!("Unable to create directory: {}", e), 1);
});
}
File::create(path).unwrap_or_else(|e| {
e_exit("FILE", &format!("Unable to create file: {}", e), 1);
});
}