1use std::{fs, io};
7use std::fs::File;
8use std::path::{Path, PathBuf};
9use crate::error::e_exit;
10use bio::io::{fasta, fastq, gff};
11use bio::io::gff::GffType;
12
13pub enum FileType {
15 Fasta,
16 Fastq,
17 Gff,
18 Unknown,
19}
20
21impl FileType {
22 pub fn infer_file_type(path: &PathBuf) -> FileType {
24 path.extension()
25 .and_then(|ext| ext.to_str())
26 .map(|ext| match ext.to_lowercase().as_str() {
27 "fa" | "fasta" | "pep" => FileType::Fasta, "gff" | "gff3" => FileType::Gff,
29 "fq" | "fastq" => FileType::Fastq,
30 _ => FileType::Unknown
31 })
32 .unwrap_or(FileType::Unknown)
33 }
34}
35
36
37pub struct MultiFormatWriter {
39 pub fa: fasta::Writer<File>,
40 pub fq: fastq::Writer<File>,
41 pub gff: gff::Writer<File>,
42}
43
44impl MultiFormatWriter {
45 pub fn new(path: &PathBuf) -> io::Result<Self> {
46 let file = File::create(path)?;
47 Ok(Self {
48 fa: fasta::Writer::new(file.try_clone()?),
49 gff: gff::Writer::new(file.try_clone()?, GffType::GFF3), fq: fastq::Writer::new(file),
51 })
52 }
53}
54
55pub fn try_file_type_ext(file: &Path) -> Result<String, Box<dyn std::error::Error>> {
58 let ext = file.extension().unwrap().to_str().unwrap();
59 match ext {
60 "fasta" | "fa" => Ok("fasta".to_string()),
61 "fastq" | "fq" => Ok("fastq".to_string()),
62 "gff" | "gtf" => Ok("gff".to_string()),
63 "bed" => Ok("bed".to_string()),
64 "sam" => Ok("sam".to_string()),
65 "bam" => Ok("bam".to_string()),
66 _ => Err(format!("Unknown file extension: {:?}", ext).into()),
67 }
68}
69
70pub fn try_seq_type_seq(seq: &[u8]) -> String {
73 if seq.is_empty() {
74 eprintln!("Empty sequence");
75 }
76
77 let (mut is_dna, mut is_rna, mut is_protein) = (true, true, true);
78 for &c in seq {
79 let c_upper = c.to_ascii_uppercase();
80 let mut valid_in_any = false;
81
82 if is_dna {
84 if matches!(c_upper, b'A' | b'T' | b'C' | b'G' | b'N') {
86 valid_in_any = true;
87 } else {
88 is_dna = false;
89 }
90 }
91
92 if is_rna {
94 if matches!(c_upper, b'A' | b'U' | b'C' | b'G') {
95 valid_in_any = true;
96 } else {
97 is_rna = false;
98 }
99 }
100
101 if is_protein {
103 if matches!(
104 c_upper,
105 b'A' | b'R'
106 | b'N'
107 | b'D'
108 | b'C'
109 | b'E'
110 | b'Q'
111 | b'G'
112 | b'H'
113 | b'I'
114 | b'L'
115 | b'K'
116 | b'M'
117 | b'F'
118 | b'P'
119 | b'S'
120 | b'T'
121 | b'W'
122 | b'Y'
123 | b'V'
124 | b'B'
125 | b'J'
126 | b'O'
127 | b'U'
128 | b'X'
129 | b'Z'
130 ) {
131 valid_in_any = true;
132 } else {
133 is_protein = false;
134 }
135 }
136
137 if !valid_in_any {
139 eprintln!("Invalid character: {}", c as char);
140 }
141 if only_one_true(is_dna, is_rna, is_protein) {
143 break;
144 }
145 }
146
147 if is_dna {
150 "DNA".into()
151 } else if is_rna {
152 "RNA".into()
153 } else if is_protein {
154 "Protein".into()
155 } else {
156 "Unknown sequence type".into()
157 }
158}
159
160fn only_one_true(a: bool, b: bool, c: bool) -> bool {
161 (a as u8 + b as u8 + c as u8) == 1
162}
163
164pub fn write_file<P: AsRef<Path>>(path: P, content: &str) {
167 fs::write(path, content).expect("Unable to write file");
168}
169
170pub fn is_directory_path(path: &PathBuf) -> bool {
172 path.extension().map_or(true, |ext| {
173 ext.is_empty() || path.as_os_str().to_str().unwrap().ends_with('.')
174 })
175}
176
177pub fn create_file_with_dir(path: &Path) {
180 if let Some(parent) = path.parent() {
181 fs::create_dir_all(parent).unwrap_or_else(|e| {
182 e_exit("DIR", &format!("Unable to create directory: {}", e), 1);
183 });
184 }
185
186 File::create(path).unwrap_or_else(|e| {
187 e_exit("FILE", &format!("Unable to create file: {}", e), 1);
188 });
189}