libbytestat/
lib.rs

1//! # Bytestat
2//!
3//! Bytestat is a crate to measure randomness of data. 
4//! Data is processed one byte at a time, sequentially.
5//! The distribution and interval of each byte is measured. 
6//! Five metrics are used to measure different aspects of the set. 
7//! The final score is between 0 and 100 as f64. 
8//! Good quality random data should score 100 when rounded up.
9pub struct Bytestat {
10    counter:u128,
11    dist:[u128;256],
12    interval:[u128;256*256],
13    last:[u128;256],
14    score_counter:u128,
15    score_non_zero:f64,
16    score_unique:f64,
17    score_amplitude:f64,
18    score_interval_continuity:f64,
19    score_interval_amplitude:f64,
20    score:f64,
21}
22
23impl Bytestat {
24  /// Create new Bytestat object.
25  ///
26  /// # Examples
27  ///
28  /// ```
29  /// use bytestat::Bytestat;
30  /// let stats = Bytestat::new();
31  /// ```
32  pub fn new() -> Bytestat {
33    Bytestat {
34      counter:0,
35      dist:[0;256],
36      interval:[0;256*256],
37      last:[0;256],
38      score_counter:0,
39      score_non_zero:0.0,
40      score_unique:0.0,
41      score_amplitude:0.0,
42      score_interval_continuity:0.0,
43      score_interval_amplitude:0.0,
44      score:0.0,
45      }
46  }
47
48  /// Analyze one byte, bytes must be analysed in sequence.
49  /// If bytes are not analyzed in sequence, the final score will not be valid.
50  /// Repeat as needed.
51  ///
52  /// # Arguments
53  ///
54  /// * `value` - A byte to be analyzed, u8
55  /// 
56  /// # Examples
57  ///
58  /// ```
59  /// use bytestat::Bytestat;
60  /// let stats = Bytestat::new();
61  /// 
62  /// for x in 0..limit {
63  ///   let my_byte = get_random_byte();
64  ///   stats.analyze( my_byte );
65  /// }
66  /// ```
67  pub fn analyze(&mut self, value:u8) {
68      self.counter += 1;
69      self.dist[value as usize] += 1;
70      self.interval[ ((self.counter - self.last[value as usize]) as u16) as usize ] += 1;
71      self.last[value as usize] = self.counter;
72    }
73
74  fn update_scores(&mut self) {
75    if self.score_counter == self.counter {
76      return
77    }
78
79    //1 of 5
80    let mut dist_not_zero = 0;
81    for x in self.dist {
82      if x > 0 {
83        dist_not_zero += 1;
84      }
85    }
86    self.score_non_zero = dist_not_zero as f64 / 256 as f64;
87
88    //2 of 5
89    let mut dist_unique = 0;
90    let mut dist_unique_map:std::collections::HashMap<u128, i32> = std::collections::HashMap::new();
91    for x in 0..256 {
92      match dist_unique_map.get(&self.dist[x]) {
93        Some(value) => dist_unique_map.insert(self.dist[x], 1+value),
94        None => dist_unique_map.insert(self.dist[x], 1)
95      };
96    }
97    dist_unique_map.values().for_each(|x| {
98      if *x == 1 {
99        dist_unique += 1;
100      }
101    });
102    self.score_unique = dist_unique as f64 / 256 as f64;
103
104    //3 of 5
105    let mut dist_amp_min:u128 = std::u128::MAX;
106    let mut dist_amp_max:u128 = std::u128::MIN;
107    for x in self.dist {
108      if x < dist_amp_min {
109        dist_amp_min = x;
110      }
111      if x > dist_amp_max {
112        dist_amp_max = x;
113      }
114    }
115    let dist_amp_variation = dist_amp_max - dist_amp_min;
116    self.score_amplitude = (dist_amp_max - dist_amp_variation) as f64 / dist_amp_max as f64;
117
118    //4 of 5
119    let mut interval_min = std::u16::MAX;
120    let mut interval_max = std::u16::MIN;
121
122    for x in 1..self.interval.len() {
123      if self.interval[x] > self.counter / 4096 {
124        if (x as u16) < interval_min {
125          interval_min = x as u16;
126        }
127        if (x as u16) > interval_max {
128          interval_max = x as u16;
129        }
130      }
131    }
132
133    let mut populated = 1;
134    for x in 1..interval_max {
135      if self.interval[x as usize] > self.counter / 4096 {
136        populated += 1;
137      }
138    }
139    self.score_interval_continuity = (if populated < 512 { populated } else { 512 }) as f64 / 512 as f64;
140
141    //5 of 5
142    if interval_max > 512 {
143      interval_max = 512;
144    }
145    self.score_interval_amplitude = interval_max as f64 / 512 as f64;
146
147    //FINAL SCORE
148    self.score = self.score_non_zero * 20f64;
149    self.score += self.score_unique * 20f64;
150    self.score += self.score_amplitude * 20f64;
151    self.score += self.score_interval_continuity * 20f64;
152    self.score += self.score_interval_amplitude * 20f64;
153
154
155    self.score_counter = self.counter;
156  }
157
158  /// Generate the score based on distribution of unique bytes being present in the set.
159  /// 
160  /// (unique byte present in set) / (maximum number of possible unique bytes, 256)
161  ///
162  /// # Examples
163  ///
164  /// ```
165  /// use bytestat::Bytestat;
166  /// let stats = Bytestat::new();
167  /// 
168  /// for x in 0..limit {
169  ///   let my_byte = get_random_byte();
170  ///   stats.analyze( my_byte );
171  /// }
172  /// 
173  /// stats.get_score_non_zero()
174  /// ```
175  pub fn get_score_non_zero(&mut self) -> f64 {
176    self.update_scores();
177    self.score_non_zero
178  }
179
180  /// Generate the score based on the uniqueness of the bytes distribution in the set.
181  /// The score is between 0.0 and 1.0. Any score lower than 0.99 should be considered problematic.
182  /// 
183  /// (unique byte count in set) / (maximum number of possible unique bytes, 256)
184  ///
185  /// # Examples
186  ///
187  /// ```
188  /// use bytestat::Bytestat;
189  /// let stats = Bytestat::new();
190  /// 
191  /// for x in 0..limit {
192  ///   let my_byte = get_random_byte();
193  ///   stats.analyze( my_byte );
194  /// }
195  /// 
196  /// stats.get_score_unique()
197  /// ```
198  pub fn get_score_unique(&mut self) -> f64 {
199    self.update_scores();
200    self.score_unique
201  }
202
203  /// Generate the score based on the amplitude of the bytes distribution in the set.
204  /// The score is between 0.0 and 1.0. 
205  /// Any score lower than 0.99 should be considered problematic.
206  /// 
207  /// ((bytes count max) - (bytes count min)) / (bytes count max)
208  ///
209  /// # Examples
210  ///
211  /// ```
212  /// use bytestat::Bytestat;
213  /// let stats = Bytestat::new();
214  /// 
215  /// for x in 0..limit {
216  ///   let my_byte = get_random_byte();
217  ///   stats.analyze( my_byte );
218  /// }
219  /// 
220  /// stats.get_score_amplitude()
221  /// ```
222  pub fn get_score_amplitude(&mut self) -> f64 {
223    self.update_scores();
224    self.score_amplitude
225  }
226
227  /// Generate the sub score based on the amplitude of the continuity of significant interval measurements.
228  /// The score is between 0 and 1. 
229  /// Any score lower than 0.99 should be considered problematic.
230  /// 
231  /// ( 1 ... interval_largest [] ) / (interval_largest)
232  ///
233  /// # Examples
234  ///
235  /// ```
236  /// use bytestat::Bytestat;
237  /// let stats = Bytestat::new();
238  /// 
239  /// for x in 0..limit {
240  ///   let my_byte = get_random_byte();
241  ///   stats.analyze( my_byte );
242  /// }
243  /// 
244  /// stats.get_score_amplitude()
245  /// ```
246  pub fn get_score_interval_continuity(&mut self) -> f64 {
247    self.update_scores();
248    self.score_interval_continuity
249  }
250
251  /// Generate the score based on the amplitude of significant interval measurements relative to twice the range of byte.
252  /// The score is between 0.0 and 1.0. Any score lower than 1.0 should be considered problematic.
253  /// 
254  /// ( interval_largest ) / 512
255  ///
256  /// # Examples
257  ///
258  /// ```
259  /// use bytestat::Bytestat;
260  /// let stats = Bytestat::new();
261  /// 
262  /// for x in 0..limit {
263  ///   let my_byte = get_random_byte();
264  ///   stats.analyze( my_byte );
265  /// }
266  /// 
267  /// stats.get_score_interval_amplitude()
268  /// ```
269  pub fn get_score_interval_amplitude(&mut self) -> f64 {
270    self.update_scores();
271    self.score_interval_amplitude
272  }
273
274  /// Generate the final score based on the 5 individual tests. 
275  /// Score between 0 and 100. 99 or lower is very problematic.
276  ///
277  /// # Examples
278  ///
279  /// ```
280  /// use bytestat::Bytestat;
281  /// let stats = Bytestat::new();
282  /// 
283  /// for x in 0..limit {
284  ///   let my_byte = get_random_byte();
285  ///   stats.analyze( my_byte );
286  /// }
287  /// 
288  /// stats.get_score()
289  /// ```
290  pub fn get_score(&mut self) -> f64 {
291    self.update_scores();
292    self.score
293  }
294
295  pub fn get_scores_array(&mut self) -> [f64;6] {
296    [
297      self.get_score_non_zero(),
298      self.get_score_unique(),
299      self.get_score_amplitude(),
300      self.get_score_interval_continuity(),
301      self.get_score_interval_amplitude(),
302      self.get_score()
303    ]
304  }
305
306  pub fn get_scores_string(&mut self, seperator:&str) -> String {
307    let mut answer = String::from("");
308
309    answer.push_str( self.get_score_non_zero().to_string().as_str() );
310    answer.push_str( seperator );
311
312    answer.push_str( self.get_score_unique().to_string().as_str() );
313    answer.push_str( seperator );
314
315    answer.push_str( self.get_score_amplitude().to_string().as_str() );
316    answer.push_str( seperator );
317
318    answer.push_str( self.get_score_interval_continuity().to_string().as_str() );
319    answer.push_str( seperator );
320
321    answer.push_str( self.get_score_interval_amplitude().to_string().as_str() );
322    answer.push_str( seperator );
323
324    answer.push_str( self.get_score().to_string().as_str() );
325
326    answer
327  }
328}