use std::ffi::OsStr;
use std::path::Path;
use ahash::AHashMap as HashMap;
use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum GenomicFmt {
FastqAuto,
FastqGzip,
Fastq,
ContigAuto,
ContigGzip,
Contig,
Maf,
}
impl std::fmt::Display for GenomicFmt {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::FastqAuto => write!(f, "fastq-auto"),
Self::FastqGzip => write!(f, "fastq-gzip"),
Self::Fastq => write!(f, "fastq"),
Self::ContigAuto => write!(f, "contig-auto"),
Self::ContigGzip => write!(f, "contig-gzip"),
Self::Contig => write!(f, "contig"),
Self::Maf => write!(f, "maf"),
}
}
}
impl std::str::FromStr for GenomicFmt {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"fastq-auto" => Ok(Self::FastqAuto),
"fastq-gzip" => Ok(Self::FastqGzip),
"fastq" => Ok(Self::Fastq),
"contig-auto" => Ok(Self::ContigAuto),
"contig-gzip" => Ok(Self::ContigGzip),
"contig" => Ok(Self::Contig),
"maf" => Ok(Self::Maf),
_ => Err(format!("{} is not a valid format", s)),
}
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum SeqReadFmt {
Auto,
Fastq,
Gzip,
}
impl std::fmt::Display for SeqReadFmt {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Auto => write!(f, "auto"),
Self::Fastq => write!(f, "fastq"),
Self::Gzip => write!(f, "gzip"),
}
}
}
impl std::str::FromStr for SeqReadFmt {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"auto" => Ok(Self::Auto),
"fastq" => Ok(Self::Fastq),
"gzip" => Ok(Self::Gzip),
_ => Err(format!("{} is not a valid format", s)),
}
}
}
pub fn infer_raw_input_auto(input: &Path) -> SeqReadFmt {
let ext: &str = input
.extension()
.and_then(OsStr::to_str)
.expect("Failed parsing extension");
match ext {
"fq" | "fastq" => SeqReadFmt::Fastq,
"gz" | "gzip" => SeqReadFmt::Gzip,
_ => panic!(
"The program cannot recognize the file extension. \
Try to specify the input format."
),
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ContigFmt {
Auto,
Fasta,
Gzip,
}
impl std::fmt::Display for ContigFmt {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Auto => write!(f, "auto"),
Self::Fasta => write!(f, "fasta"),
Self::Gzip => write!(f, "gzip"),
}
}
}
pub fn infer_contig_fmt_auto(input: &Path) -> ContigFmt {
let ext: &str = input
.extension()
.and_then(OsStr::to_str)
.expect("Failed parsing extension");
match ext {
"fa" | "fasta" | "fna" | "fsa" | "fas" => ContigFmt::Fasta,
"gz" | "gzip" => ContigFmt::Gzip,
_ => panic!(
"The program cannot recognize the file extension. \
Try to specify the input format."
),
}
}
impl std::str::FromStr for ContigFmt {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"auto" => Ok(Self::Auto),
"fasta" => Ok(Self::Fasta),
"gzip" => Ok(Self::Gzip),
_ => Err(format!("{} is not a valid format", s)),
}
}
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum InputFmt {
Auto,
Fasta,
Nexus,
Phylip,
}
impl std::fmt::Display for InputFmt {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Auto => write!(f, "auto"),
Self::Fasta => write!(f, "fasta"),
Self::Nexus => write!(f, "nexus"),
Self::Phylip => write!(f, "phylip"),
}
}
}
impl std::str::FromStr for InputFmt {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"auto" => Ok(Self::Auto),
"fasta" => Ok(Self::Fasta),
"nexus" => Ok(Self::Nexus),
"phylip" => Ok(Self::Phylip),
_ => Err(format!("{} is not a valid format", s)),
}
}
}
pub fn infer_input_auto(input: &Path) -> InputFmt {
let ext: &str = input
.extension()
.and_then(OsStr::to_str)
.expect("Failed parsing extension");
match ext {
"fa" | "fasta" | "fna" | "fsa" | "fas" => InputFmt::Fasta,
"nex" | "nxs" | "nexus" => InputFmt::Nexus,
"phy" | "phylip" | "ph" => InputFmt::Phylip,
_ => panic!(
"The program cannot recognize the file extension. \
Try to specify the input format."
),
}
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum OutputFmt {
Fasta,
Nexus,
Phylip,
FastaInt,
NexusInt,
PhylipInt,
}
impl std::fmt::Display for OutputFmt {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Fasta => write!(f, "fasta"),
Self::Nexus => write!(f, "nexus"),
Self::Phylip => write!(f, "phylip"),
Self::FastaInt => write!(f, "Interleaved fasta"),
Self::NexusInt => write!(f, "Interleaved nexus"),
Self::PhylipInt => write!(f, "Interleaved phylip"),
}
}
}
impl std::str::FromStr for OutputFmt {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"fasta" => Ok(Self::Fasta),
"nexus" => Ok(Self::Nexus),
"phylip" => Ok(Self::Phylip),
"fasta-int" => Ok(Self::FastaInt),
"nexus-int" => Ok(Self::NexusInt),
"phylip-int" => Ok(Self::PhylipInt),
_ => Err(format!("{} is not a valid format", s)),
}
}
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum PartitionFmt {
Charset,
CharsetCodon,
Nexus,
NexusCodon,
Raxml,
RaxmlCodon,
}
impl std::fmt::Display for PartitionFmt {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Charset => write!(f, "charset"),
Self::CharsetCodon => write!(f, "charset-codon"),
Self::Nexus => write!(f, "nexus"),
Self::NexusCodon => write!(f, "nexus-codon"),
Self::Raxml => write!(f, "raxml"),
Self::RaxmlCodon => write!(f, "raxml-codon"),
}
}
}
impl std::str::FromStr for PartitionFmt {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"charset" => Ok(Self::Charset),
"charset-codon" => Ok(Self::CharsetCodon),
"nexus" => Ok(Self::Nexus),
"nexus-codon" => Ok(Self::NexusCodon),
"raxml" => Ok(Self::Raxml),
"raxml-codon" => Ok(Self::RaxmlCodon),
_ => Err(format!("{} is not a valid format", s)),
}
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum DnaStrand {
Forward,
Reverse,
Missing,
}
impl Default for DnaStrand {
fn default() -> Self {
Self::Missing
}
}
impl std::fmt::Display for DnaStrand {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Forward => write!(f, "+"),
Self::Reverse => write!(f, "-"),
Self::Missing => write!(f, "."),
}
}
}
impl std::str::FromStr for DnaStrand {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"+" => Ok(Self::Forward),
"-" => Ok(Self::Reverse),
"." => Ok(Self::Missing),
_ => Err(format!("{} is not a valid DNA strand", s)),
}
}
}
impl DnaStrand {
pub fn from_char(c: char) -> Self {
match c {
'+' => DnaStrand::Forward,
'-' => DnaStrand::Reverse,
'.' => DnaStrand::Missing,
_ => panic!("Invalid DNA strand"),
}
}
pub fn to_char(&self) -> char {
match self {
DnaStrand::Forward => '+',
DnaStrand::Reverse => '-',
DnaStrand::Missing => '.',
}
}
}
impl Serialize for DnaStrand {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match *self {
DnaStrand::Forward => serializer.serialize_str("+"),
DnaStrand::Reverse => serializer.serialize_str("-"),
DnaStrand::Missing => serializer.serialize_str("."),
}
}
}
impl<'de> Deserialize<'de> for DnaStrand {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
match s.as_str() {
"+" => Ok(DnaStrand::Forward),
"-" => Ok(DnaStrand::Reverse),
_ => Err(serde::de::Error::custom("Invalid DNA strand")),
}
}
}
#[derive(PartialEq)]
pub enum DataType {
Dna,
Aa,
Ignore,
}
impl std::fmt::Display for DataType {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Dna => write!(f, "DNA"),
Self::Aa => write!(f, "Amino acid"),
Self::Ignore => write!(f, "Ignore Data Type"),
}
}
}
impl std::str::FromStr for DataType {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"dna" => Ok(Self::Dna),
"aa" => Ok(Self::Aa),
"ignore" => Ok(Self::Ignore),
_ => Err(format!("{} is not a valid data type", s)),
}
}
}
pub type SeqMatrix = IndexMap<String, String>;
pub struct Partition {
pub gene: String,
pub start: usize,
pub end: usize,
}
impl Default for Partition {
fn default() -> Self {
Self::new()
}
}
impl Partition {
pub fn new() -> Self {
Self {
gene: String::new(),
start: 0,
end: 0,
}
}
}
#[derive(Clone, Debug)]
pub struct Header {
pub ntax: usize,
pub nchar: usize,
pub datatype: String,
pub missing: char,
pub gap: char,
pub aligned: bool,
}
impl Default for Header {
fn default() -> Self {
Self::new()
}
}
impl Header {
pub fn new() -> Self {
Self {
ntax: 0,
nchar: 0,
datatype: String::from("dna"),
missing: '?',
gap: '-',
aligned: false,
}
}
pub fn update(&mut self, matrix: &SeqMatrix) {
self.ntax = matrix.len();
self.nchar = matrix.values().next().unwrap().len();
}
pub fn from_seq_matrix(&mut self, matrix: &SeqMatrix, aligned: bool) {
self.ntax = matrix.len();
self.nchar = matrix.values().next().unwrap().len();
self.aligned = aligned;
}
}
pub struct TaxonRecords {
pub char_counts: HashMap<char, usize>,
pub locus_counts: usize,
pub total_chars: usize,
pub gc_count: usize,
pub at_count: usize,
pub nucleotides: usize,
pub missing_data: usize,
}
impl Default for TaxonRecords {
fn default() -> Self {
Self::new()
}
}
impl TaxonRecords {
pub fn new() -> Self {
Self {
char_counts: HashMap::new(),
locus_counts: 0,
total_chars: 0,
gc_count: 0,
at_count: 0,
nucleotides: 0,
missing_data: 0,
}
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum SummaryMode {
Minimal,
Default,
Complete,
}
impl std::fmt::Display for SummaryMode {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Minimal => write!(f, "minimal"),
Self::Default => write!(f, "default"),
Self::Complete => write!(f, "complete"),
}
}
}
impl std::str::FromStr for SummaryMode {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"minimal" => Ok(Self::Minimal),
"default" => Ok(Self::Default),
"complete" => Ok(Self::Complete),
_ => Err(format!("{} is not a valid summary mode", s)),
}
}
}
pub enum SummaryOutput {
Stdout,
Csv,
Tsv,
Json,
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum GeneticCodes {
StandardCode,
VertMtDna,
YeastMtDna,
MoldProtCoelMtDna,
InvertMtDna,
CilDasHexNu,
EchiFlatwormMtDna,
EuplotidNu,
BacArchPlantPlast,
AltYeastNu,
AsciMtDna,
AltFlatwormMtDna,
ChlorMtDna,
TrematodeMtDna,
ScenedesmusMtDna,
ThrausMtDna,
RhabdopMtDna,
CaDivSR1GraciBac,
PachyNu,
MesodiniumNu,
PeritrichNu,
CephalodiscidaeMtDna,
}
impl std::fmt::Display for GeneticCodes {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::StandardCode => write!(f, "Standard Code"),
Self::VertMtDna => write!(f, "Vertebrate Mitochondrial DNA"),
Self::YeastMtDna => write!(f, "Yeast Mitochondrial DNA"),
Self::MoldProtCoelMtDna => write!(f, "Mold, Protozoan, and Coelenterate Mitochondrial DNA and the Mycoplasma/Spiroplasma Code"),
Self::InvertMtDna => write!(f, "Invertebrate Mitochondrial DNA"),
Self::CilDasHexNu => write!(f, "Ciliate, Dasycladacean and Hexamita Nuclear Code"),
Self::EchiFlatwormMtDna => write!(f, "Echinoderm and Flatworm Mitochondrial DNA"),
Self::EuplotidNu => write!(f, "Euplotid Nuclear Code"),
Self::BacArchPlantPlast => write!(f, "Bacterial, Archaeal and Plant Plastid Code"),
Self::AltYeastNu => write!(f, "Alternative Yeast Nuclear Code"),
Self::AsciMtDna => write!(f, "Ascidian Mitochondrial DNA"),
Self::AltFlatwormMtDna => write!(f, "Alternative Flatworm Mitochondrial DNA"),
Self::ChlorMtDna => write!(f, "Chlorophycean Mitochondrial DNA"),
Self::TrematodeMtDna => write!(f, "Trematode Mitochondrial DNA"),
Self::ScenedesmusMtDna => write!(f, "Scenedesmus obliquus Mitochondrial DNA"),
Self::ThrausMtDna => write!(f, "Thraustochytrium Mitochondrial DNA"),
Self::RhabdopMtDna => write!(f, "Rhabdopleuridae Mitochondrial DNA"),
Self::CaDivSR1GraciBac => write!(f, "Candidate Division SR1 and Gracilibacteria"),
Self::PachyNu => write!(f, "Pachysolen tannophilus Nuclear Code"),
Self::MesodiniumNu => write!(f, "Mesodinium Nuclear Code"),
Self::PeritrichNu => write!(f, "Peritrich Nuclear Code"),
Self::CephalodiscidaeMtDna => write!(f, "Cephalodiscidae Mitochondrial DNA"),
}
}
}
impl std::str::FromStr for GeneticCodes {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"1" => Ok(Self::StandardCode),
"2" => Ok(Self::VertMtDna),
"3" => Ok(Self::YeastMtDna),
"4" => Ok(Self::MoldProtCoelMtDna),
"5" => Ok(Self::InvertMtDna),
"6" => Ok(Self::CilDasHexNu),
"9" => Ok(Self::EchiFlatwormMtDna),
"10" => Ok(Self::EuplotidNu),
"11" => Ok(Self::BacArchPlantPlast),
"12" => Ok(Self::AltYeastNu),
"13" => Ok(Self::AsciMtDna),
"14" => Ok(Self::AltFlatwormMtDna),
"16" => Ok(Self::ChlorMtDna),
"21" => Ok(Self::TrematodeMtDna),
"22" => Ok(Self::ScenedesmusMtDna),
"23" => Ok(Self::ThrausMtDna),
"24" => Ok(Self::RhabdopMtDna),
"25" => Ok(Self::CaDivSR1GraciBac),
"26" => Ok(Self::PachyNu),
"29" => Ok(Self::MesodiniumNu),
"30" => Ok(Self::PeritrichNu),
"33" => Ok(Self::CephalodiscidaeMtDna),
_ => Err(format!("{} is not a valid genetic code", s)),
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_parsing_input_fmt() {
let file = Path::new("tests/files/simple.nex");
let input_fmt = infer_input_auto(file);
assert_eq!(InputFmt::Nexus, input_fmt);
}
}