use alloc::format;
use core::marker::Copy;
use crate::error::EtError;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum FileType {
Gzip,
Bzip,
Lzma,
Zstd,
Bam,
Fasta,
Fastq,
Facs,
Sam,
Scf, Ztr, AgilentMsMsScan, AgilentChemstationDad,
AgilentChemstationFid,
AgilentChemstationMs,
AgilentChemstationMwd,
AgilentChemstationUv,
AgilentMasshunterDad,
AgilentMasshunterDadHeader,
BrukerBaf,
BrukerMsms,
InficonHapsite,
ThermoRaw,
ThermoCf,
ThermoDxf,
WatersAutospec,
NetCdf,
MzXml,
Las,
Dicom,
Gif,
Jpeg,
Png,
Hdf5,
ApacheAvro,
ApacheParquet,
Sqlite,
DelimitedText,
Unknown,
}
impl FileType {
#[must_use]
pub fn from_magic(magic: &[u8]) -> FileType {
if magic.len() > 8 {
match &magic[..8] {
b"FCS2.0 " | b"FCS3.0 " | b"FCS3.1 " => return FileType::Facs,
b"~VERSION" | b"~Version" => return FileType::Las,
b"\x89PNG\r\n\x1A\n" => return FileType::Png,
b"\x89HDF\r\n\x1A\n" => return FileType::Hdf5,
b"\x04\x03\x02\x01SPAH" => return FileType::InficonHapsite,
b"\xAEZTR\x0D\x0A\x1A\x0A" => return FileType::Ztr,
b"\x01\xA1F\x00i\x00n\x00" => return FileType::ThermoRaw,
b"SQLite f" => return FileType::Sqlite,
_ => {}
}
}
if magic.len() > 4 {
match &magic[..4] {
b"BAM\x01" => return FileType::Bam,
b"DICM" => return FileType::Dicom,
b"GIF8" => return FileType::Gif,
b"@HD\t" | b"@SQ\t" => return FileType::Sam,
b"PAR1" => return FileType::ApacheParquet,
b"\x2Escf" => return FileType::Scf,
b"\x01\x32\x00\x00" => return FileType::AgilentChemstationMs,
b"\x02\x02\x00\x00" => return FileType::AgilentMasshunterDadHeader,
b"\x02\x33\x30\x00" => return FileType::AgilentChemstationMwd,
b"\x02\x33\x31\x00" => return FileType::AgilentChemstationDad,
b"\x02\x38\x31\x00" => return FileType::AgilentChemstationFid,
b"\x03\x02\x00\x00" => return FileType::AgilentMasshunterDad,
b"\x03\x31\x33\x31" => return FileType::AgilentChemstationUv,
b"\x28\xB5\x2F\xFD" => return FileType::Zstd,
b"\x4F\x62\x6A\x01" => return FileType::ApacheAvro,
b"\xFF\xD8\xFF\xDB" | b"\xFF\xD8\xFF\xE0" | b"\xFF\xD8\xFF\xE1"
| b"\xFF\xD8\xFF\xEE" => return FileType::Jpeg,
[0xFF, 0xFF, 0x06 | 0x05, 0x00] => {
if magic.len() >= 78 && &magic[52..64] == b"C\x00I\x00s\x00o\x00G\x00C\x00" {
return FileType::ThermoCf;
}
return FileType::ThermoDxf;
}
_ => {}
}
}
if magic.len() < 2 {
return FileType::Unknown;
}
match &magic[..2] {
[0x0F | 0x1F, 0x8B] => return FileType::Gzip,
[0x42, 0x5A] => return FileType::Bzip,
[0xFD, 0x37] => return FileType::Lzma,
[0x24, 0x00] => return FileType::BrukerBaf,
[0x43, 0x44] => return FileType::NetCdf,
_ => {}
}
match &magic[..1] {
b">" => FileType::Fasta,
b"@" => FileType::Fastq,
_ => FileType::Unknown,
}
}
#[must_use]
pub fn from_extension(ext: &str) -> &[Self] {
match ext {
"ami" => &[FileType::BrukerMsms],
"avro" => &[FileType::ApacheAvro],
"baf" => &[FileType::BrukerBaf],
"bam" => &[FileType::Bam],
"bz" | "bz2" | "bzip" => &[FileType::Bzip],
"cdf" => &[FileType::NetCdf],
"cf" => &[FileType::ThermoCf],
"ch" => &[
FileType::AgilentChemstationFid,
FileType::AgilentChemstationMwd,
],
"csv" | "tsv" => &[FileType::DelimitedText],
"dicm" => &[FileType::Dicom],
"dxf" => &[FileType::ThermoDxf],
"fa" | "faa" | "fasta" | "fna" => &[FileType::Fasta],
"faq" | "fastq" | "fq" => &[FileType::Fastq],
"fcs" | "lmd" => &[FileType::Facs],
"gif" => &[FileType::Gif],
"gz" | "gzip" => &[FileType::Gzip],
"hdf" => &[FileType::Hdf5],
"hps" => &[FileType::InficonHapsite],
"idx" => &[FileType::WatersAutospec],
"jpg" | "jpeg" => &[FileType::Jpeg],
"ms" => &[FileType::AgilentChemstationMs],
"mzxml" => &[FileType::MzXml],
"png" => &[FileType::Png],
"raw" => &[FileType::ThermoRaw],
"sam" => &[FileType::Sam],
"scf" => &[FileType::Scf],
"sd" => &[FileType::AgilentMasshunterDadHeader],
"sp" => &[FileType::AgilentMasshunterDad],
"sqlite" => &[FileType::Sqlite],
"uv" => &[
FileType::AgilentChemstationDad,
FileType::AgilentChemstationUv,
],
"xz" => &[FileType::Lzma],
"zstd" => &[FileType::Zstd],
"ztr" => &[FileType::Ztr],
_ => &[FileType::Unknown],
}
}
pub fn to_parser_name<'a>(&self, hint: Option<&'a str>) -> Result<&'a str, EtError> {
Ok(match (self, hint) {
(FileType::AgilentChemstationDad, None) => "chemstation_dad",
(FileType::AgilentChemstationFid, None) => "chemstation_fid",
(FileType::AgilentChemstationMs, None) => "chemstation_ms",
(FileType::AgilentChemstationMwd, None) => "chemstation_mwd",
(FileType::AgilentChemstationUv, None) => "chemstation_uv",
(FileType::AgilentMasshunterDad, None) => "masshunter_dad",
(FileType::AgilentMasshunterDadHeader, None) => return Err("Reading the \".sd\" file is unsupported. Please open the \".sp\" data file instead".into()),
(FileType::Bam, None) => "bam",
(FileType::Fasta, None) => "fasta",
(FileType::Fastq, None) => "fastq",
(FileType::Facs, None) => "flow",
(FileType::InficonHapsite, None) => "inficon_hapsite",
(FileType::Png, None) => "png",
(FileType::Sam, None) => "sam",
(FileType::ThermoCf, None) => "thermo_cf",
(FileType::ThermoDxf, None) => "thermo_dxf",
(FileType::ThermoRaw, None) => "thermo_raw",
(FileType::DelimitedText, None) => "tsv",
(_, Some(x)) => x,
(x, _) => return Err(format!("{:?} doesn't have a parser", x).into())
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parser_names() {
let filetypes = [
(FileType::AgilentChemstationFid, "chemstation_fid"),
(FileType::AgilentChemstationMs, "chemstation_ms"),
(FileType::AgilentChemstationMwd, "chemstation_mwd"),
(FileType::AgilentChemstationUv, "chemstation_uv"),
(FileType::AgilentMasshunterDad, "masshunter_dad"),
(FileType::Bam, "bam"),
(FileType::Fasta, "fasta"),
(FileType::Fastq, "fastq"),
(FileType::Facs, "flow"),
(FileType::InficonHapsite, "inficon_hapsite"),
(FileType::Png, "png"),
(FileType::Sam, "sam"),
(FileType::ThermoCf, "thermo_cf"),
(FileType::ThermoDxf, "thermo_dxf"),
(FileType::ThermoRaw, "thermo_raw"),
(FileType::DelimitedText, "tsv"),
];
for (ft, parser) in filetypes {
assert_eq!(ft.to_parser_name(None).unwrap(), parser);
}
}
}