use std::collections::hash_map::DefaultHasher;
use std::collections::HashMap;
use std::hash::{Hash, Hasher};
use std::io::Cursor;
use std::ops::Deref;
use std::str::FromStr;
use pyo3::basic::CompareOp;
use pyo3::create_exception;
use pyo3::exceptions::PyKeyError;
use pyo3::prelude::*;
use pyo3::types::{PyBytes, PyDict, PyList, PyType};
use serde_json::Value;
use crate::base::InternalIndex;
use crate::json::JsonFormat;
use crate::rank::TaxRank;
use crate::Taxonomy as TaxonomyTrait;
use crate::{gtdb, json, ncbi, newick, phyloxml, prune_away, prune_to, GeneralTaxonomy};
create_exception!(taxonomy, TaxonomyError, pyo3::exceptions::PyException);
macro_rules! py_try {
($call:expr) => {
$call.map_err(|e| PyErr::new::<TaxonomyError, _>(format!("{}", e)))?
};
($call:expr, $msg:expr) => {
$call.map_err(|_| PyErr::new::<TaxonomyError, _>($msg.to_owned()))?
};
}
fn json_value_to_pyobject(val: &Value) -> PyObject {
Python::with_gil(|py| match val {
Value::Null => py.None(),
Value::Bool(b) => b.to_object(py),
Value::Number(n) => {
if let Some(n1) = n.as_i64() {
return n1.to_object(py);
}
n.as_f64().unwrap().to_object(py)
}
Value::String(s) => s.to_object(py),
Value::Array(arr) => {
let pylist = PyList::empty(py);
for v in arr {
pylist
.append(json_value_to_pyobject(v))
.expect("can add items to list");
}
pylist.to_object(py)
}
Value::Object(obj) => {
let pydict = PyDict::new(py);
for (key, val) in obj.iter() {
pydict
.set_item(key, json_value_to_pyobject(val))
.expect("can add items to dict");
}
pydict.to_object(py)
}
})
}
#[pyclass]
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct TaxonomyNode {
#[pyo3(get)]
id: String,
#[pyo3(get)]
name: String,
#[pyo3(get)]
parent: Option<String>,
#[pyo3(get)]
rank: String,
extra: HashMap<String, Value>,
}
#[pymethods]
impl TaxonomyNode {
fn __hash__(&self) -> u64 {
let mut hasher = DefaultHasher::new();
self.id.hash(&mut hasher);
self.name.hash(&mut hasher);
self.parent.hash(&mut hasher);
self.rank.hash(&mut hasher);
for key in self.extra.keys() {
key.hash(&mut hasher);
}
hasher.finish()
}
fn __richcmp__(&self, other: PyRef<TaxonomyNode>, op: CompareOp) -> Py<PyAny> {
let py = other.py();
match op {
CompareOp::Eq => (self == other.deref()).into_py(py),
CompareOp::Ne => (self != other.deref()).into_py(py),
_ => py.NotImplemented(),
}
}
fn __getitem__(&self, obj: &PyAny, py: Python<'_>) -> PyResult<PyObject> {
let key: &str = obj.extract()?;
match key {
"id" => Ok(self.id.to_object(py)),
"name" => Ok(self.name.to_object(py)),
"parent" => Ok(self.parent.to_object(py)),
"rank" => Ok(self.rank.to_object(py)),
_ => {
if self.extra.contains_key(key) {
Ok(json_value_to_pyobject(self.extra.get(key).unwrap()))
} else {
return Err(PyKeyError::new_err(format!("Key {} not found", key)));
}
}
}
}
fn __repr__(&self) -> PyResult<String> {
Ok(format!(
"<TaxonomyNode (id=\"{}\" rank=\"{}\" name=\"{}\")>",
self.id, self.rank, self.name
))
}
}
#[pyclass]
#[derive(Debug, Clone)]
pub struct Taxonomy {
tax: GeneralTaxonomy,
}
impl Taxonomy {
pub(crate) fn get_name<'t>(&'t self, tax_id: &'t str) -> PyResult<&'t str> {
let name = py_try!(self.tax.name(tax_id));
Ok(name)
}
pub(crate) fn get_rank(&self, tax_id: &str) -> PyResult<String> {
let rank = py_try!(self.tax.rank(tax_id)).to_string();
Ok(rank)
}
pub(crate) fn as_node(&self, tax_id: &str) -> PyResult<TaxonomyNode> {
let name = self.get_name(tax_id)?;
let rank = self.get_rank(tax_id)?;
let parent = py_try!(self.tax.parent(tax_id)).map(|(p, _)| p.to_string());
let extra = py_try!(self.tax.data(tax_id));
Ok(TaxonomyNode {
id: tax_id.to_string(),
name: name.to_string(),
rank,
extra: (*extra).to_owned(),
parent,
})
}
}
#[pymethods]
impl Taxonomy {
#[classmethod]
fn from_gtdb(_cls: &PyType, value: &str) -> PyResult<Taxonomy> {
let mut c = Cursor::new(value);
let tax = py_try!(gtdb::load(&mut c));
Ok(Taxonomy { tax })
}
#[classmethod]
fn from_json(_cls: &PyType, value: &str, json_pointer: Option<&str>) -> PyResult<Taxonomy> {
let mut c = Cursor::new(value);
let tax = py_try!(json::load(&mut c, json_pointer));
Ok(Taxonomy { tax })
}
#[classmethod]
fn from_newick(_cls: &PyType, value: &str) -> PyResult<Taxonomy> {
let mut c = Cursor::new(value);
let tax = py_try!(newick::load(&mut c));
Ok(Taxonomy { tax })
}
#[classmethod]
fn from_ncbi(_cls: &PyType, dump_dir: &str) -> PyResult<Taxonomy> {
let tax = py_try!(ncbi::load(dump_dir));
Ok(Taxonomy { tax })
}
#[classmethod]
fn from_phyloxml(_cls: &PyType, value: &str) -> PyResult<Taxonomy> {
let mut c = Cursor::new(value);
let tax = py_try!(phyloxml::load(&mut c));
Ok(Taxonomy { tax })
}
pub fn clone(&self) -> Taxonomy {
Clone::clone(self)
}
fn to_json_tree(&self, py: Python<'_>) -> PyResult<PyObject> {
let mut bytes = Vec::new();
py_try!(json::save::<_, &str, _>(
&mut bytes,
&self.tax,
JsonFormat::Tree,
None
));
Ok(PyBytes::new(py, &bytes).into())
}
fn to_json_node_links(&self, py: Python<'_>) -> PyResult<PyObject> {
let mut bytes = Vec::new();
py_try!(json::save::<_, &str, _>(
&mut bytes,
&self.tax,
JsonFormat::NodeLink,
None
));
Ok(PyBytes::new(py, &bytes).into())
}
fn to_ncbi(&self, output_dir: &str) -> PyResult<()> {
py_try!(ncbi::save::<&str, _, _>(&self.tax, output_dir));
Ok(())
}
fn to_newick(&self, py: Python<'_>) -> PyResult<PyObject> {
let mut bytes = Vec::new();
py_try!(newick::save(
&mut bytes,
&self.tax,
Some(TaxonomyTrait::<&str>::root(&self.tax))
));
Ok(PyBytes::new(py, &bytes).into())
}
fn node(&self, tax_id: &str) -> Option<TaxonomyNode> {
self.as_node(tax_id).ok()
}
fn find_all_by_name(&self, name: &str) -> PyResult<Vec<TaxonomyNode>> {
let res = self
.tax
.find_all_by_name(name)
.into_iter()
.map(|tax_id| self.as_node(tax_id))
.collect::<PyResult<Vec<TaxonomyNode>>>()?;
Ok(res)
}
fn parent_with_distance(
&self,
tax_id: &str,
at_rank: Option<&str>,
) -> PyResult<(Option<TaxonomyNode>, Option<f32>)> {
let parent_res = if let Some(rank) = at_rank {
if let Ok(rank) = TaxRank::from_str(rank) {
self.tax.parent_at_rank(tax_id, rank)
} else {
return Err(PyErr::new::<TaxonomyError, _>(format!(
"Rank {} could not be understood",
rank
)));
}
} else {
self.tax.parent(tax_id)
};
if let Ok(Some((id, distance))) = parent_res {
Ok((self.as_node(id).ok(), Some(distance)))
} else {
Ok((None, None))
}
}
fn parent(&self, tax_id: &str, at_rank: Option<&str>) -> PyResult<Option<TaxonomyNode>> {
let (node, _) = self.parent_with_distance(tax_id, at_rank)?;
Ok(node)
}
fn children(&self, tax_id: &str) -> PyResult<Vec<TaxonomyNode>> {
let res = py_try!(self.tax.children(tax_id))
.into_iter()
.map(|tax_id| self.as_node(tax_id))
.collect::<PyResult<Vec<TaxonomyNode>>>()?;
Ok(res)
}
fn descendants(&self, tax_id: &str) -> PyResult<Vec<TaxonomyNode>> {
let res = py_try!(self.tax.descendants(tax_id))
.into_iter()
.map(|tax_id| self.as_node(tax_id))
.collect::<PyResult<Vec<TaxonomyNode>>>()?;
Ok(res)
}
fn lineage(&self, tax_id: &str) -> PyResult<Vec<TaxonomyNode>> {
let res = py_try!(self.tax.lineage(tax_id))
.into_iter()
.map(|tax_id| self.as_node(tax_id))
.collect::<PyResult<Vec<TaxonomyNode>>>()?;
Ok(res)
}
fn internal_index(&self, tax_id: &str) -> PyResult<usize> {
let internal_index = py_try!(self.tax.to_internal_index(tax_id));
Ok(internal_index)
}
fn parents(&self, tax_id: &str) -> PyResult<Vec<TaxonomyNode>> {
let mut lineage = self.lineage(tax_id)?;
lineage.drain(..1);
Ok(lineage)
}
fn lca(&self, id1: &str, id2: &str) -> PyResult<Option<TaxonomyNode>> {
let lca_id = py_try!(self.tax.lca(id1, id2));
Ok(self.node(lca_id))
}
fn prune(&self, keep: Option<Vec<&str>>, remove: Option<Vec<&str>>) -> PyResult<Taxonomy> {
let mut tax = self.tax.clone();
if let Some(k) = keep {
tax = py_try!(prune_to(&tax, &k, false));
}
if let Some(r) = remove {
tax = py_try!(prune_away(&tax, &r));
}
Ok(Taxonomy { tax })
}
fn remove_node(&mut self, tax_id: &str) -> PyResult<()> {
py_try!(self.tax.remove(tax_id));
Ok(())
}
fn add_node(&mut self, parent_id: &str, tax_id: &str, name: &str, rank: &str) -> PyResult<()> {
if self.node(tax_id).is_some() {
return Err(PyErr::new::<TaxonomyError, _>(format!(
"A node with tax id {} already exists",
tax_id
)));
}
py_try!(self.tax.add(parent_id, tax_id));
py_try!(self.edit_node(tax_id, Some(name), Some(rank), None, None));
Ok(())
}
fn edit_node(
&mut self,
tax_id: &str,
name: Option<&str>,
rank: Option<&str>,
parent_id: Option<&str>,
parent_distance: Option<f32>,
) -> PyResult<()> {
let idx = py_try!(self.tax.to_internal_index(tax_id));
if let Some(r) = rank {
self.tax.ranks[idx] = py_try!(TaxRank::from_str(r), "Rank could not be understood");
}
if let Some(n) = name {
self.tax.names[idx] = n.to_string();
}
if let Some(p) = parent_id {
if tax_id == TaxonomyTrait::<&str>::root(&self.tax) {
return Err(PyErr::new::<TaxonomyError, _>("Root cannot have a parent"));
}
let lineage = py_try!(self.tax.lineage(p), "New parent has bad lineage?");
if lineage.contains(&tax_id) {
return Err(PyErr::new::<TaxonomyError, _>(
"Node can not be moved to its child",
));
}
let old_parent_idx = self.tax.parent_ids[idx];
let new_parent_idx = py_try!(self.tax.to_internal_index(p));
self.tax.parent_ids[idx] = py_try!(self.tax.to_internal_index(p));
let removal_index = self.tax.children_lookup[old_parent_idx]
.binary_search(&idx)
.unwrap();
self.tax.children_lookup[old_parent_idx].remove(removal_index);
self.tax.children_lookup[old_parent_idx].sort_unstable();
self.tax.children_lookup[new_parent_idx].push(idx);
self.tax.children_lookup[new_parent_idx].sort_unstable();
}
if let Some(p) = parent_distance {
if tax_id == TaxonomyTrait::<&str>::root(&self.tax) {
return Err(PyErr::new::<TaxonomyError, _>("Root cannot have a parent"));
}
self.tax.parent_distances[idx] = p;
}
Ok(())
}
#[getter]
fn root(&self) -> TaxonomyNode {
let key: &str = self.tax.root();
self.as_node(key).unwrap()
}
fn __repr__(&self) -> PyResult<String> {
Ok(format!(
"<Taxonomy ({} nodes)>",
TaxonomyTrait::<InternalIndex>::len(&self.tax)
))
}
fn __len__(&self) -> PyResult<usize> {
Ok(TaxonomyTrait::<InternalIndex>::len(&self.tax))
}
fn __getitem__(&self, tax_id: &str) -> PyResult<TaxonomyNode> {
self.as_node(tax_id)
}
fn __delitem__(&mut self, tax_id: &str) -> PyResult<()> {
Ok(py_try!(self.tax.remove(tax_id)))
}
fn __contains__(&self, tax_id: &str) -> PyResult<bool> {
Ok(self.tax.to_internal_index(tax_id).is_ok())
}
fn __iter__(slf: PyRefMut<Self>, py: Python<'_>) -> PyResult<TaxonomyIterator> {
let root = slf.tax.root();
let root_idx = slf.tax.to_internal_index(root).unwrap();
Ok(TaxonomyIterator {
t: slf.into_py(py),
nodes_left: vec![root_idx],
visited_nodes: Vec::new(),
})
}
}
#[pyclass]
pub struct TaxonomyIterator {
t: PyObject,
visited_nodes: Vec<usize>,
nodes_left: Vec<usize>,
}
#[pymethods]
impl TaxonomyIterator {
fn __iter__(slf: PyRef<Self>) -> PyRef<Self> {
slf
}
fn __next__(mut slf: PyRefMut<Self>, py: Python<'_>) -> PyResult<Option<String>> {
let traverse_preorder = true;
loop {
if slf.nodes_left.is_empty() {
return Ok(None);
}
let cur_node = *slf.nodes_left.last().unwrap();
let node_visited = {
let last_visited = slf.visited_nodes.last();
Some(&cur_node) == last_visited
};
let node = if node_visited {
slf.visited_nodes.pop();
slf.nodes_left.pop().unwrap() } else {
slf.visited_nodes.push(cur_node);
let tax: PyRef<Taxonomy> = slf.t.extract(py)?;
let cur_node_str = tax.tax.from_internal_index(cur_node).unwrap();
let children = tax
.tax
.children(cur_node_str)
.map_err(|e| PyErr::new::<TaxonomyError, _>(format!("{}", e)))?;
let children: Vec<_> = children
.into_iter()
.map(|c| tax.tax.to_internal_index(c).unwrap())
.collect();
drop(tax);
if !children.is_empty() {
slf.nodes_left.extend(children);
}
cur_node };
if node_visited != traverse_preorder {
let tax: PyRef<Taxonomy> = slf.t.extract(py)?;
return Ok(Some(
tax.tax
.from_internal_index(node)
.map_err(|e| PyErr::new::<TaxonomyError, _>(format!("{}", e)))?
.to_string(),
));
}
}
}
}
#[pymodule]
fn taxonomy(py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<Taxonomy>()?;
m.add_class::<TaxonomyNode>()?;
m.add("TaxonomyError", py.get_type::<TaxonomyError>())?;
Ok(())
}