use crate::errors::TaxonomyResult;
use crate::rank::TaxRank;
use serde_json::Value;
use std::borrow::Cow;
use std::collections::{HashMap, VecDeque};
use std::fmt::{Debug, Display};
pub trait Taxonomy<'t, T: 't>
where
T: Clone + Debug + Display + PartialEq,
{
fn root(&'t self) -> T;
fn children(&'t self, tax_id: T) -> TaxonomyResult<Vec<T>>;
fn descendants(&'t self, tax_id: T) -> TaxonomyResult<Vec<T>>;
fn parent(&'t self, tax_id: T) -> TaxonomyResult<Option<(T, f32)>>;
fn lineage(&'t self, tax_id: T) -> TaxonomyResult<Vec<T>> {
let mut parents = Vec::new();
let mut last_parent = tax_id.clone();
parents.push(tax_id);
while let Some(p) = self.parent(last_parent)? {
last_parent = p.0.clone();
parents.push(p.0);
}
Ok(parents)
}
fn parent_at_rank(&'t self, tax_id: T, rank: TaxRank) -> TaxonomyResult<Option<(T, f32)>> {
if self.rank(tax_id.clone())? == rank {
return Ok(Some((tax_id, 0.0)));
}
let mut cur_id = tax_id;
let mut dists = Vec::new();
while let Some(p) = self.parent(cur_id)? {
dists.push(p.1);
if self.rank(p.0.clone())? == rank {
return Ok(Some((p.0, dists.into_iter().sum())));
}
cur_id = p.0.clone();
}
Ok(None)
}
fn lca(&'t self, id1: T, id2: T) -> TaxonomyResult<T> {
let mut id1_parents = VecDeque::new();
id1_parents.push_front(id1);
while let Some(p) = self.parent(id1_parents.front().unwrap().clone())? {
id1_parents.push_front(p.0);
}
let mut id2_parents = VecDeque::new();
id2_parents.push_front(id2);
while let Some(p) = self.parent(id2_parents.front().unwrap().clone())? {
id2_parents.push_front(p.0);
}
let mut common = self.root();
for (pid1, pid2) in id1_parents.into_iter().zip(id2_parents.into_iter()) {
if pid1 != pid2 {
break;
}
common = pid1;
}
Ok(common)
}
fn name(&'t self, tax_id: T) -> TaxonomyResult<&str>;
fn data(&'t self, _tax_id: T) -> TaxonomyResult<Cow<'t, HashMap<String, Value>>> {
Ok(Cow::Owned(HashMap::new()))
}
fn rank(&'t self, tax_id: T) -> TaxonomyResult<TaxRank>;
fn traverse(&'t self, node: T) -> TaxonomyResult<TaxonomyIterator<'t, T>>
where
Self: Sized,
{
Ok(TaxonomyIterator::new(self, node))
}
fn len(&'t self) -> usize
where
Self: Sized,
{
self.traverse(self.root()).unwrap().count() / 2
}
fn is_empty(&'t self) -> bool
where
Self: Sized,
{
self.len() == 0
}
}
pub struct TaxonomyIterator<'t, T: 't> {
nodes_left: Vec<T>,
visited_nodes: Vec<T>,
tax: &'t dyn Taxonomy<'t, T>,
}
impl<'t, T> TaxonomyIterator<'t, T> {
pub fn new(tax: &'t dyn Taxonomy<'t, T>, root_node: T) -> Self {
TaxonomyIterator {
nodes_left: vec![root_node],
visited_nodes: vec![],
tax,
}
}
}
impl<'t, T> Iterator for TaxonomyIterator<'t, T>
where
T: Clone + Debug + Display + PartialEq,
{
type Item = (T, bool);
fn next(&mut self) -> Option<Self::Item> {
if self.nodes_left.is_empty() {
return None;
}
let cur_node = self.nodes_left.last().unwrap().clone();
let node_visited = {
let last_visited = self.visited_nodes.last();
Some(&cur_node) == last_visited
};
if node_visited {
self.visited_nodes.pop();
Some((self.nodes_left.pop().unwrap(), false))
} else {
self.visited_nodes.push(cur_node.clone());
let children = self.tax.children(cur_node.clone()).unwrap();
if !children.is_empty() {
self.nodes_left.extend(children);
}
Some((cur_node, true))
}
}
}
#[cfg(test)]
pub(crate) mod tests {
use super::*;
use std::collections::HashSet;
pub(crate) struct MockTax;
impl<'t> Taxonomy<'t, u32> for MockTax {
fn root(&self) -> u32 {
1
}
fn children(&self, tax_id: u32) -> TaxonomyResult<Vec<u32>> {
Ok(match tax_id {
1 => vec![131567],
131567 => vec![2],
2 => vec![1224],
1224 => vec![1236],
1236 => vec![135622, 135613], 135622 => vec![22],
22 => vec![62322],
62322 => vec![56812], 135613 => vec![1046],
1046 => vec![53452],
53452 => vec![61598], 61598 => vec![765909],
_ => vec![],
})
}
fn descendants(&'t self, tax_id: u32) -> TaxonomyResult<Vec<u32>> {
let children: HashSet<u32> = self
.traverse(tax_id)?
.map(|(n, _)| n)
.filter(|n| *n != tax_id)
.collect();
let mut children: Vec<u32> = children.into_iter().collect();
children.sort_unstable();
Ok(children)
}
fn parent(&self, tax_id: u32) -> TaxonomyResult<Option<(u32, f32)>> {
Ok(match tax_id {
131567 => Some((1, 1.)),
2 => Some((131567, 1.)),
1224 => Some((2, 1.)),
1236 => Some((1224, 1.)), 135622 => Some((1236, 1.)),
22 => Some((135622, 1.)),
62322 => Some((22, 1.)),
56812 => Some((62322, 1.)), 135613 => Some((1236, 1.)),
1046 => Some((135613, 1.)),
53452 => Some((1046, 1.)),
61598 => Some((53452, 1.)), 765909 => Some((61598, 1.)), _ => None,
})
}
fn name(&self, tax_id: u32) -> TaxonomyResult<&str> {
Ok(match tax_id {
1 => "root",
131567 => "cellular organisms",
2 => "Bacteria",
1224 => "Proteobacteria",
1236 => "Gammaproteobacteria",
135613 => "Chromatiales",
1046 => "Chromatiaceae",
53452 => "Lamprocystis",
61598 => "Lamprocystis purpurea",
765909 => "Lamprocystis purpurea DSM 4197",
_ => "",
})
}
fn rank(&self, tax_id: u32) -> TaxonomyResult<TaxRank> {
Ok(match tax_id {
2 => TaxRank::Superkingdom,
1224 => TaxRank::Phylum,
1236 => TaxRank::Class,
135613 => TaxRank::Order,
1046 => TaxRank::Family,
53452 => TaxRank::Genus,
61598 => TaxRank::Species,
_ => TaxRank::Unspecified,
})
}
}
#[test]
fn test_len() {
let tax = MockTax;
assert_eq!(tax.root(), 1);
assert_eq!(tax.len(), 14);
assert_eq!(tax.is_empty(), false);
}
#[test]
fn test_descendants() {
let tax = MockTax;
assert_eq!(
tax.descendants(2).unwrap(),
vec![22, 1046, 1224, 1236, 53452, 56812, 61598, 62322, 135613, 135622, 765909]
);
}
#[test]
fn test_lca() {
let tax = MockTax;
assert_eq!(tax.lca(56812, 22).unwrap(), 22);
assert_eq!(tax.lca(56812, 765909).unwrap(), 1236);
}
#[test]
fn test_lineage() {
let tax = MockTax;
assert_eq!(tax.lineage(1).unwrap(), vec![1]);
assert_eq!(
tax.lineage(61598).unwrap(),
vec![61598, 53452, 1046, 135613, 1236, 1224, 2, 131567, 1]
);
}
#[test]
fn test_parent_at_rank() {
let tax = MockTax;
assert_eq!(
tax.parent_at_rank(765909, TaxRank::Genus).unwrap(),
Some((53452, 2.))
);
assert_eq!(
tax.parent_at_rank(765909, TaxRank::Class).unwrap(),
Some((1236, 5.))
);
assert_eq!(
tax.parent_at_rank(1224, TaxRank::Phylum).unwrap(),
Some((1224, 0.))
);
assert_eq!(tax.parent_at_rank(1224, TaxRank::Genus).unwrap(), None,);
}
#[test]
fn test_traversal() {
let tax = MockTax;
let mut visited = HashSet::new();
let n_nodes = tax
.traverse(tax.root())
.unwrap()
.enumerate()
.map(|(ix, (tid, pre))| match ix {
0 => {
assert_eq!(tid, 1, "root is first");
assert_eq!(pre, true, "preorder visits happen first");
}
27 => {
assert_eq!(tid, 1, "root is last too");
assert_eq!(pre, false, "postorder visits happen last");
}
_ => {
if pre {
visited.insert(tid);
} else {
assert!(visited.contains(&tid));
}
}
})
.count();
assert_eq!(n_nodes, 28, "Each node appears twice");
}
}