1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
use std::cmp::{PartialOrd, Ordering};
use std::fmt;
use std::fmt::{Display, Formatter};
use std::iter::FromIterator;
use std::ops::{Index, IndexMut};

use classifier;
use data::{Rating, Item, MAX_RATING};

#[derive(PartialEq)]
pub struct Interval {
    pub avg: f32,
    pub stdev: f32,
}

impl Interval {
    pub fn is_nan(&self) -> bool {
        assert!(self.avg.is_nan() == self.stdev.is_nan());
        self.avg.is_nan()
    }
}

impl PartialOrd for Interval {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        (self.avg, -self.stdev).partial_cmp(&(other.avg, -other.stdev))
    }
}

impl Display for Interval {
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
        write!(f, "{:.2}±{:.2}", self.avg, self.stdev)
    }
}

pub struct Stats {
    pub total: usize,
    pub rated: usize,
    pub rating: Interval,
}

pub struct Histogram {
    ratings: [usize; MAX_RATING as usize + 1],
}

impl Histogram {
    pub fn get_max_rated(&self) -> (Rating, usize) {
        self.ratings
            .iter()
            .enumerate()
            .skip(1)
            .fold((0, 0), |(cur_rating, cur_max), (rating, &num)| {
                if num > cur_max {
                    (rating as Rating, num)
                } else {
                    (cur_rating, cur_max)
                }
            })
    }
    pub fn get_stats(&self) -> Stats {
        let (rated, sum) = self.ratings
            .iter()
            .enumerate()
            .skip(1)
            .fold((0, 0),
                  |(count, sum), (rating, &num)| (count + num, sum + (rating as usize * num)));
        let avg = sum as f32 / rated as f32;
        let var = self.ratings
            .iter()
            .enumerate()
            .skip(1)
            .fold(0f32,
                  |sum, (rating, &num)| sum + num as f32 * (rating as f32 - avg).powf(2.0)) /
                  rated as f32;
        Stats {
            total: rated + self.ratings[0],
            rated: rated,
            rating: Interval {
                avg: avg,
                stdev: var.sqrt(),
            },
        }
    }
}

impl<'a> FromIterator<&'a Item> for Histogram {
    fn from_iter<Iter>(iter: Iter) -> Self
        where Iter: IntoIterator<Item = &'a Item>
    {
        let mut result = Histogram { ratings: [0; MAX_RATING as usize + 1] };
        for item in iter {
            result[item.rating] += 1;
        }
        result
    }
}

impl Index<Option<Rating>> for Histogram {
    type Output = usize;
    fn index<'a>(&'a self, rating: Option<Rating>) -> &'a Self::Output {
        match rating {
            Some(rating) => &self.ratings[rating as usize],
            None => &self.ratings[0],
        }
    }
}

impl IndexMut<Option<Rating>> for Histogram {
    fn index_mut<'a>(&'a mut self, rating: Option<Rating>) -> &'a mut Self::Output {
        match rating {
            Some(val) => &mut self.ratings[val as usize],
            None => &mut self.ratings[0],
        }
    }
}

pub struct TagStats {
    pub tag: String,
    pub stats: Stats,
}

pub fn generate_tag_stats(all_items: &Vec<Item>) -> Vec<TagStats> {
    let mut result: Vec<TagStats> = classifier::classify_by_tags(all_items)
        .into_iter()
        .filter_map(|(tag, items)| {
            let hist: Histogram = items.into_iter().collect();
            let stats = hist.get_stats();
            if stats.rating.is_nan() {
                return None;
            }
            Some(TagStats {
                tag: tag,
                stats: stats,
            })
        })
        .collect();
    result.sort_by(|l, r| {
        // It should be safe to unwrap here because we should have
        // filtered out all NaNs in the loop above.
        l.stats.rating.partial_cmp(&r.stats.rating).unwrap().reverse()
    });
    result
}