#[macro_use]
extern crate log;
extern crate structopt;
extern crate fastax;
use std::error::Error;
use std::io;
use std::path::PathBuf;
use std::process;
use itertools::Itertools;
use structopt::StructOpt;
#[derive(StructOpt)]
pub struct Opt {
#[structopt(subcommand)]
cmd: Command,
#[structopt(short = "v", long = "verbose")]
verbosity: bool,
#[structopt(short = "d", long = "debug")]
debug: bool,
}
#[derive(StructOpt)]
enum Command {
#[structopt(name = "show")]
Show {
terms: Vec<String>,
#[structopt(short = "c", long = "csv")]
csv: bool,
},
#[structopt(name = "lineage")]
Lineage {
terms: Vec<String>,
#[structopt(short = "r", long = "ranks")]
ranks: bool,
#[structopt(short = "c", long = "csv")]
csv: bool,
},
#[structopt(name = "populate")]
Populate {
#[structopt(short = "e", long = "email", default_value="plop@example.com")]
email: String,
#[structopt(long = "taxdmp")]
taxdmp: Option<PathBuf>
},
#[structopt(name = "tree")]
Tree {
terms: Vec<String>,
#[structopt(short = "i", long = "internal")]
internal: bool,
#[structopt(short = "n", long = "newick")]
newick: bool,
#[structopt(short = "f", long = "format")]
format: Option<String>,
},
#[structopt(name = "subtree")]
SubTree {
term: String,
#[structopt(short = "s", long = "species")]
species: bool,
#[structopt(short = "i", long = "internal")]
internal: bool,
#[structopt(short = "n", long = "newick")]
newick: bool,
#[structopt(short = "f", long = "format")]
format: Option<String>,
},
#[structopt(name = "lca")]
LCA {
terms: Vec<String>,
#[structopt(short = "c", long = "csv")]
csv: bool,
},
}
fn show(nodes: Vec<fastax::Node>, csv: bool) -> Result<(), Box<dyn Error>> {
if csv {
let mut wtr = csv::Writer::from_writer(io::stdout());
wtr.write_record(&["taxid", "scientific_name",
"rank", "division", "genetic_code",
"mitochondrial_genetic_code"])?;
for node in nodes.iter() {
wtr.serialize((
node.tax_id,
&node.names.get("scientific name").unwrap()[0],
&node.rank,
&node.division,
&node.genetic_code,
&node.mito_genetic_code))?;
}
wtr.flush()?;
} else {
for node in nodes.iter() {
println!("{}", node);
}
}
Ok(())
}
fn show_lineages(lineages: Vec<Vec<fastax::Node>>, ranks: bool, csv: bool) -> Result<(), Box<dyn Error>> {
if csv {
let mut wtr = csv::WriterBuilder::new()
.flexible(true)
.from_writer(io::stdout());
for lineage in lineages {
let nodes = lineage;
let row = nodes.iter()
.filter(|node| !ranks || node.rank != "no rank")
.map(|node| format!("{}:{}:{}",
&node.rank,
&node.names.get("scientific name").unwrap()[0],
node.tax_id))
.collect::<Vec<String>>();
wtr.serialize(row)?;
}
wtr.flush()?;
} else {
for lineage in lineages {
let nodes = lineage.iter()
.filter(|node| !ranks || node.rank != "no rank")
.map(|node| format!("{}: {} (taxid: {})",
&node.rank,
&node.names.get("scientific name").unwrap()[0],
node.tax_id))
.collect::<Vec<String>>();
for (i, node) in nodes.iter().enumerate() {
if i == 0 { println!("root"); }
else if i == nodes.len() - 1 {
println!("{}\u{2514}\u{2500}\u{2500} {}",
std::iter::repeat(" ").take(i+1).collect::<String>(),
node);
} else {
println!("{}\u{2514}\u{252C}\u{2500} {}",
std::iter::repeat(" ").take(i+1).collect::<String>(),
node);
}
}
}
}
Ok(())
}
fn show_tree(mut tree: fastax::tree::Tree, internal: bool, newick: bool, format: Option<String>) -> Result<(), Box<dyn Error>> {
if let Some(format_string) = format {
tree.set_format_string(format_string);
} else if newick {
tree.set_format_string(String::from("%name"));
}
if !internal {
tree.simplify();
}
if newick {
println!("{}", tree.to_newick());
} else {
println!("{}", tree);
}
Ok(())
}
fn show_lcas(lcas: Vec<[fastax::Node; 3]>, csv: bool) -> Result<(), Box<dyn Error>> {
let mut wtr = csv::WriterBuilder::new()
.from_writer(io::stdout());
if csv {
wtr.write_record(&[
"name1", "taxid1",
"name2", "taxid2",
"lca_name", "lca_taxid"
])?;
}
for [node1, node2, lca] in lcas {
let name1 = &node1.names.get("scientific name").unwrap()[0];
let name2 = &node2.names.get("scientific name").unwrap()[0];
let lca_name = &lca.names.get("scientific name").unwrap()[0];
if csv {
wtr.write_record(&[
name1, &node1.tax_id.to_string(),
name2, &node2.tax_id.to_string(),
lca_name, &lca.tax_id.to_string()
])?;
} else {
println!("LCA({}, {}) = {}", name1, name2, lca_name);
}
}
wtr.flush()?;
Ok(())
}
pub fn run(opt: Opt) -> Result<(), Box<dyn Error>> {
if opt.debug {
loggerv::Logger::new()
.max_level(log::Level::Debug)
.level(true)
.init()?;
} else if opt.verbosity {
loggerv::Logger::new()
.max_level(log::Level::Info)
.level(true)
.init()?;
} else {
loggerv::init_quiet()?;
}
let xdg_dirs = xdg::BaseDirectories::with_prefix("fastax")?;
let datadir = xdg_dirs.get_data_home();
xdg_dirs.create_data_directory(&datadir)?;
let dbpath = datadir.join("taxonomy.db");
let db = fastax::db::DB::new(&dbpath)?;
match opt.cmd {
Command::Populate{email, taxdmp} => {
if let Some(taxdmp) = taxdmp {
db.populate(&taxdmp)?;
} else {
fastax::populate_db(&datadir, email)?;
}
},
Command::Show{terms, csv} => {
let nodes = fastax::get_nodes(&db, &terms)?;
show(nodes, csv)?;
},
Command::Lineage{terms, ranks, csv} => {
let nodes = fastax::get_nodes(&db, &terms)?;
let lineages = fastax::make_lineages(&db, &nodes)?;
show_lineages(lineages, ranks, csv)?;
},
Command::Tree{terms, internal, newick, format} => {
let nodes = fastax::get_nodes(&db, &terms)?;
let tree = fastax::make_tree(&db, &nodes)?;
show_tree(tree, internal, newick, format)?;
},
Command::SubTree{term, species, internal, newick, format} => {
let root = fastax::get_node(&db, term)?;
let tree = fastax::make_subtree(&db, root, species)?;
show_tree(tree, internal, newick, format)?;
},
Command::LCA{terms, csv} => {
let nodes = fastax::get_nodes(&db, &terms)?;
if nodes.len() < 2 {
error!("The lca command need at least two taxa.");
}
let mut lcas: Vec<[fastax::Node; 3]> = vec![];
for pair in nodes.iter().combinations(2) {
let node1 = pair[0];
let node2 = pair[1];
let lca = fastax::get_lca(&db, &node1, &node2)?;
lcas.push([node1.clone(), node2.clone(), lca]);
}
show_lcas(lcas, csv)?;
},
}
Ok(())
}
fn main() {
let opt = Opt::from_args();
if let Err(e) = run(opt) {
if e.to_string().contains("no such table") {
error!("The database is probably not initialized.\nTry running: 'fastax populate'");
} else {
error!("{}", e);
}
}
process::exit(exitcode::OK);
}