1#![crate_name = "basiccms"]
2#![crate_type = "lib"]
3
4extern crate rand;
5
6use std::collections::hash_map::DefaultHasher;
7use std::hash::{ Hasher, Hash };
8
9pub struct Sketch {
10 width : usize,
11 depth : usize,
12 buckets: Vec<Vec<u64>>,
13 hash_offsets: Vec<u64>
14}
15
16impl Sketch {
17
18 pub fn add<T: IntoSketch>(&mut self, val: T) -> &Sketch {
19 for row in 0..self.depth {
20 let key = self.index(&val, row);
21 let value = self.buckets[row][key];
22
23 self.buckets[row][key] = value + 1;
24 }
25
26 self
27 }
28
29 pub fn point<T: IntoSketch>(&mut self, val: T) -> u64 {
30 let mut counts = Vec::with_capacity(self.depth);
31
32 for row in 0..self.depth {
33 let key = self.index(&val, row);
34 let value = self.buckets[row][key];
35 counts.push(value);
36 }
37
38 *counts.iter().min().unwrap()
39 }
40
41 pub fn new(epsilon: f64, delta: f64) -> Sketch {
42 if epsilon < 0.0 {
43 panic!("CMS: epsilon must be positive");
44 }
45
46 if delta < 0.0 || delta > 1.0 {
47 panic!("CMS: delta must be in (0.0, 1.0)");
48 }
49
50 let width = (std::f64::consts::E / epsilon).ceil() as usize;
51 let depth = (1.0 / delta).ln().ceil() as usize;
52 let hash_offsets = Sketch::make_hash_offsets(depth);
53
54 Sketch {
55 width: width,
56 depth: depth,
57 buckets: vec![vec![0; width]; depth],
58 hash_offsets: hash_offsets
59 }
60 }
61
62 fn index<T: IntoSketch>(&mut self, val: &T, row: usize) -> usize {
63 let sketched = val.asu64();
64 let offset = self.hash_offsets[row];
65 let hashed = sketched.wrapping_mul(offset);
66
67 (hashed % (self.width as u64)) as usize
68 }
69
70 fn make_hash_offsets(depth: usize) -> Vec<u64> {
71 let mut hashers: Vec<u64> = Vec::with_capacity(depth);
72
73 for _ in 0..depth {
74 hashers.push(rand::random::<u64>());
75 }
76
77 hashers
78 }
79}
80
81fn elementwise_sum(left: &Vec<Vec<u64>>, right: &Vec<Vec<u64>>) -> Vec<Vec<u64>> {
82 left.iter()
83 .zip(right.iter())
84 .map(|(l, r)| l.iter().zip(r.iter()).map(|(x, y)| x + y).collect())
85 .collect()
86}
87
88impl<'a> std::ops::Add<&'a Sketch> for &'a Sketch {
89 type Output = Sketch;
90
91 fn add(self, other: &'a Sketch) -> Sketch {
92 assert_eq!(self.width, other.width);
93 assert_eq!(self.depth, other.depth);
94 assert_eq!(self.hash_offsets, other.hash_offsets);
95
96 Sketch {
97 width: self.width,
98 depth: self.depth,
99 hash_offsets: self.hash_offsets.clone(),
100 buckets: elementwise_sum(&self.buckets, &other.buckets)
101 }
102 }
103}
104
105impl std::clone::Clone for Sketch {
106 fn clone(&self) -> Sketch {
107 Sketch {
108 width: self.width,
109 depth: self.depth,
110 hash_offsets: self.hash_offsets.clone(),
111 buckets: self.buckets.clone()
112 }
113 }
114}
115
116pub trait IntoSketch {
117 fn asu64(&self) -> u64;
118}
119
120impl IntoSketch for u64 {
121 fn asu64(&self) -> u64 {
122 *self
123 }
124}
125
126impl IntoSketch for String {
127 fn asu64(&self) -> u64 {
128 let mut hasher = DefaultHasher::new();
129 self.hash(&mut hasher);
130
131 hasher.finish()
132 }
133}
134
135impl<'a> IntoSketch for &'a str {
136 fn asu64(&self) -> u64 {
137 let mut hasher = DefaultHasher::new();
138 self.hash(&mut hasher);
139
140 hasher.finish()
141 }
142}