1use rand::Rng;
2use std::collections::HashSet;
3use std::fs::File;
4use std::io::{self, BufRead};
5use std::path::PathBuf;
6
7pub fn estimate_from_many(paths: &[PathBuf], delta: f64, epsilon: f64) -> io::Result<usize> {
8 let m = count_rows(paths)?;
9
10 let mut rng = rand::thread_rng();
11
12 let mut x: HashSet<String> = HashSet::new();
13 let mut p: f64 = 1.0;
14
15 let thresh = ((12.0 / epsilon.powi(2)) * (8.0 * m as f64 / delta).log2()).ceil() as usize;
16
17 for path in paths {
18 let file = File::open(path)?;
19 let reader = io::BufReader::new(file);
20
21 for line in reader.lines() {
22 let el = line?;
23 x.remove(&el);
24
25 if rng.gen_bool(p) {
26 x.insert(el.clone());
27 }
28
29 if x.len() == thresh {
30 x.retain(|_| rng.gen_bool(0.5));
31 p /= 2.0;
32 }
33 }
34 }
35
36 Ok((x.len() as f64 / p) as usize)
37}
38
39fn count_rows(paths: &[PathBuf]) -> io::Result<usize> {
40 let mut total_lines = 0;
41 for path in paths {
42 let file = File::open(path)?;
43 let reader = io::BufReader::new(file);
44 total_lines += reader.lines().count();
45 }
46 Ok(total_lines)
47}
48
49pub fn estimate(source: Vec<String>, delta: f64, epsilon: f64) -> usize {
50 let mut rng = rand::thread_rng();
51
52 let mut x: HashSet<String> = HashSet::new();
53 let mut p: f64 = 1 as f64;
54 let m = source.len();
55
56 let thresh = ((12.0 / epsilon.powi(2)) * (8.0 * m as f64 / delta).log2()).ceil() as usize;
57
58
59 for el in source.iter() {
60 x.remove(el);
61
62 if rng.gen_bool(p) {
63 x.insert(el.to_owned());
64 }
65
66 if x.len() == thresh {
67 x.retain(|_| rng.gen_bool(0.5));
68 p = p/2.0;
69 }
70 }
71
72 (x.len() as f64 / p) as usize
73}