1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
//! Defines aggregation operations over a taxon tree.

pub mod lineage;
pub mod rank;

use std::collections::HashMap;

use crate::taxon;
use crate::taxon::TaxonId;

/// Allows to aggregate over a taxon tree.
pub trait Aggregator {
    /// Aggregates a set of scored taxons into a resulting taxon id.
    fn aggregate(&self, taxons: &HashMap<TaxonId, f32>) -> Result<TaxonId>;

    /// Aggregates a list of taxons into a resulting taxon id.
    fn counting_aggregate(&self, taxons: &[TaxonId]) -> Result<TaxonId> {
        let taxons = taxons.iter().map(|&t| (t, 1.0));
        self.aggregate(&count(taxons))
    }
}

/// Allows reusing a single aggregator across multiple threads
pub trait MultiThreadSafeAggregator: Aggregator + Sync + Send {}

/// Returns how many times each taxon occurs in a vector of taxons.
pub fn count<T>(taxons: T) -> HashMap<TaxonId, f32>
where
    T: Iterator<Item = (TaxonId, f32)>,
{
    let mut counts = HashMap::new();
    for (taxon, count) in taxons {
        *counts.entry(taxon).or_insert(0.0) += count;
    }
    counts
}

/// Filters any taxon in a frequency table with a frequency below the given amount.
pub fn filter(freq_table: HashMap<TaxonId, f32>, lower_bound: f32) -> HashMap<TaxonId, f32> {
    freq_table
        .into_iter()
        .filter(|&(_, freq)| freq >= lower_bound)
        .collect()
}

error_chain! {
    links {
        Taxon(taxon::Error, taxon::ErrorKind) #[doc = "Taxon"];
    }
    errors {
        /// Aggregation called on an empty list
        EmptyInput {
            description("Aggregration called on an empty list")
            display("Aggregration called on an empty list")
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::fixtures;
    use crate::rmq;
    use crate::taxon::TaxonList;
    use crate::tree;

    fn aggregators(by_id: &TaxonList) -> Vec<Box<dyn Aggregator>> {
        vec![
            Box::new(rmq::lca::LCACalculator::new(fixtures::tree())),
            Box::new(rmq::rtl::RTLCalculator::new(fixtures::ROOT, by_id)),
            Box::new(rmq::mix::MixCalculator::new(fixtures::tree(), 0.0)),
            Box::new(rmq::mix::MixCalculator::new(fixtures::tree(), 1.0)),
            Box::new(rmq::mix::MixCalculator::new(fixtures::tree(), 0.5)),
            Box::new(tree::lca::LCACalculator::new(fixtures::ROOT, by_id)),
            Box::new(tree::mix::MixCalculator::new(fixtures::ROOT, by_id, 0.0)),
            Box::new(tree::mix::MixCalculator::new(fixtures::ROOT, by_id, 1.0)),
            Box::new(tree::mix::MixCalculator::new(fixtures::ROOT, by_id, 0.5)),
        ]
    }

    #[test]
    fn test_empty_query() {
        for aggregator in aggregators(&fixtures::by_id()) {
            assert_matches!(
                *aggregator
                    .counting_aggregate(&Vec::new())
                    .unwrap_err()
                    .kind(),
                ErrorKind::EmptyInput
            );
        }
    }

    #[test]
    fn test_singleton_is_singleton() {
        for aggregator in aggregators(&fixtures::by_id()) {
            for taxon in fixtures::taxon_list() {
                assert_matches!(aggregator.counting_aggregate(&vec![taxon.id]), Ok(tid) if tid == taxon.id);
            }
        }
    }

    #[test]
    fn test_invalid_taxa() {
        for aggregator in aggregators(&fixtures::by_id()) {
            assert_matches!(
                *aggregator.counting_aggregate(&vec![5]).unwrap_err().kind(),
                ErrorKind::Taxon(taxon::ErrorKind::UnknownTaxon(5))
            );
            assert_matches!(
                *aggregator
                    .counting_aggregate(&vec![1, 2, 5, 1])
                    .unwrap_err()
                    .kind(),
                ErrorKind::Taxon(taxon::ErrorKind::UnknownTaxon(5))
            );
        }
    }
}