#![allow(clippy::comparison_chain)]
use serde::{Deserialize, Serialize};
use serde_json::to_writer;
use std::fs::OpenOptions;
use std::io::{BufReader, BufWriter};
use std::path::Path;
use rand::prelude::*;
use rand_distr::Exp1;
use rand_xoshiro::Xoshiro256PlusPlus;
use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher};
use std::marker::PhantomData;
use num::{Bounded, FromPrimitive, Integer, ToPrimitive};
use rayon::prelude::*;
#[cfg(feature = "slog")]
use argmin::core::observers::ObserverMode;
use argmin::core::{CostFunction, Executor};
use argmin::solver::goldensectionsearch::GoldenSectionSearch;
#[cfg(feature = "slog")]
use argmin_observer_slog::SlogLogger;
use anyhow::anyhow;
use crate::fyshuffle::*;
#[cfg_attr(doc, katexit::katexit)]
#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
pub struct SetSketchParams {
b: f64,
m: u64,
a: f64,
q: u64,
}
impl Default for SetSketchParams {
fn default() -> Self {
SetSketchParams {
b: 1.001,
m: 4096,
a: 20.,
q: 2_u64.pow(16) - 2,
}
}
}
impl SetSketchParams {
#[cfg_attr(doc, katexit::katexit)]
pub fn new(b: f64, m: u64, a: f64, q: u64) -> Self {
SetSketchParams { b, m, a, q }
}
pub fn get_a(&self) -> f64 {
self.a
}
pub fn get_b(&self) -> f64 {
self.b
}
pub fn get_q(&self) -> u64 {
self.q
}
pub fn get_m(&self) -> u64 {
self.m
}
pub fn set_m(&mut self, nb_sketch: usize) {
self.m = nb_sketch as u64;
}
pub fn get_jaccard_bounds(&self, jac: f64) -> (f64, f64) {
assert!(jac <= 1.);
let b_aux = self.b.powf(jac * 0.5);
let jsup = (b_aux * b_aux - 1.) / (self.b - 1.);
let b_inf = 2. * (b_aux * self.b.sqrt() - 1.) / (self.b - 1.) - 1.;
let jinf = b_inf.max(0.);
log::debug!("b_inf : {:.5e}, b_aux : {:.3e}", b_inf, b_aux);
assert!(jac >= 1. || jinf <= jsup);
(jinf, jsup)
}
pub fn dump_json(&self, dirpath: &Path) -> Result<(), String> {
let filepath = dirpath.join("parameters.json");
log::info!("dumping SetSketchParams in json file : {:?}", filepath);
let fileres = OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(&filepath);
if fileres.is_err() {
log::error!(
"SetSketchParams dump : dump could not open file {:?}",
filepath.as_os_str()
);
println!(
"SetSketchParams dump: could not open file {:?}",
filepath.as_os_str()
);
return Err("SetSketchParams dump failed".to_string());
}
let mut writer = BufWriter::new(fileres.unwrap());
to_writer(&mut writer, &self).unwrap();
Ok(())
}
pub fn reload_json(dirpath: &Path) -> Result<Self, String> {
log::info!("in reload_json");
let filepath = dirpath.join("parameters.json");
let fileres = OpenOptions::new().read(true).open(&filepath);
if fileres.is_err() {
log::error!(
"SetSketchParams reload_json : reload could not open file {:?}",
filepath.as_os_str()
);
println!(
"SetSketchParams reload_json: could not open file {:?}",
filepath.as_os_str()
);
return Err("SetSketchParams reload_json could not open file".to_string());
}
let loadfile = fileres.unwrap();
let reader = BufReader::new(loadfile);
let hll_parameters: Self = serde_json::from_reader(reader).unwrap();
Ok(hll_parameters)
} }
pub struct SetSketcher<I: Integer, T, H: Hasher + Default> {
_b: f64,
m: u64,
a: f64,
q: u64,
k_vec: Vec<I>,
lower_k: f64,
nbmin: u64,
permut_generator: FYshuffle,
nb_overflow: u64,
lnb: f64,
b_hasher: BuildHasherDefault<H>,
t_marker: PhantomData<T>,
}
impl<I, T, H> Default for SetSketcher<I, T, H>
where
I: Integer + Bounded + ToPrimitive + FromPrimitive + Copy + Clone,
H: Hasher + Default,
{
fn default() -> SetSketcher<I, T, H> {
let params = SetSketchParams::default();
let m: usize = 4096;
let k_vec: Vec<I> = (0..m).map(|_| I::zero()).collect();
let lnb = (params.get_b() - 1.).ln_1p(); SetSketcher::<I, T, H> {
_b: params.get_b(),
m: params.get_m(),
a: params.get_a(),
q: params.get_q(),
k_vec,
lower_k: 0.,
nbmin: 0,
permut_generator: FYshuffle::new(m),
nb_overflow: 0,
lnb,
b_hasher: BuildHasherDefault::<H>::default(),
t_marker: PhantomData,
}
}
}
impl<I, T, H> SetSketcher<I, T, H>
where
I: Integer + ToPrimitive + FromPrimitive + Bounded + Copy + Clone + std::fmt::Debug,
T: Hash,
H: Hasher + Default,
{
pub fn new(params: SetSketchParams, b_hasher: BuildHasherDefault<H>) -> Self {
let k_vec: Vec<I> = (0..params.get_m()).map(|_| I::zero()).collect();
let lnb = (params.get_b() - 1.).ln_1p(); SetSketcher::<I, T, H> {
_b: params.get_b(),
m: params.get_m(),
a: params.get_a(),
q: params.get_q(),
k_vec,
lower_k: 0.,
nbmin: 0,
permut_generator: FYshuffle::new(params.get_m() as usize),
nb_overflow: 0,
lnb,
b_hasher,
t_marker: PhantomData,
}
}
pub fn get_b(&self) -> f64 {
self._b
}
pub fn sketch(&mut self, to_sketch: &T) -> anyhow::Result<()> {
let hval1: u64 = self.b_hasher.hash_one(&to_sketch);
let imax: u64 = I::max_value().to_u64().unwrap(); let mut rng = Xoshiro256PlusPlus::seed_from_u64(hval1);
self.permut_generator.reset();
let iq1: i64 = self.q as i64 + 1;
let inva: f64 = 1. / self.a;
let mut x_pred: f64 = 0.;
for j in 0..self.m {
let x_j = x_pred + (inva / (self.m - j) as f64) * rng.sample::<f64, Exp1>(Exp1); x_pred = x_j;
let lb_xj = x_j.ln() / self.lnb; if lb_xj > -self.lower_k {
break;
}
let z: i64 = iq1.min((1. - lb_xj).floor() as i64);
log::trace!(
"j : {}, x_j : {:.5e} , lb_xj : {:.5e}, z : {:.5e}",
j,
x_j,
lb_xj,
z
);
let k = 0.max(z) as u64;
if k as f64 <= self.lower_k {
break;
}
let i = self.permut_generator.next(&mut rng);
if k > self.k_vec[i].to_u64().unwrap() {
log::trace!("setting slot i: {}, f_k : {:.3e}", i, k);
if k > imax {
self.nb_overflow += 1;
self.k_vec[i] = I::from_u64(imax).unwrap();
log::warn!(
"I overflow , got a k value {:.3e} over I::max : {:#}",
k,
imax
);
} else {
self.k_vec[i] = I::from_u64(k).unwrap();
}
self.nbmin += 1;
if self.nbmin % self.m == 0 {
let flow = self
.k_vec
.iter()
.fold(self.k_vec[0], |min: I, x| if x < &min { *x } else { min })
.to_f64()
.unwrap();
if flow > self.lower_k {
log::debug!(
"j : {}, nbmin = {} , setting low to : {:?}",
j,
self.nbmin,
flow
);
self.lower_k = flow;
}
}
}
}
Ok(())
}
pub fn get_low_sketch(&self) -> i64 {
self.lower_k.floor() as i64
}
pub fn get_nb_overflow(&self) -> u64 {
self.nb_overflow
}
pub fn sketch_slice(&mut self, to_sketch: &[T]) -> anyhow::Result<()> {
if to_sketch.is_empty() {
println!(" empty arg");
return Err(anyhow!("empty sketch"));
}
for val in to_sketch {
self.sketch(val).unwrap();
}
Ok(())
}
pub fn get_cardinal_stats(&self) -> (f64, f64) {
let sumbk = self.k_vec.iter().fold(0.0f64, |acc: f64, c| {
acc + (-c.to_f64().unwrap() * (self._b - 1.).ln_1p()).exp()
});
let cardinality: f64 = self.m as f64 * (1. - 1. / self._b) / (self.a * self.lnb * sumbk);
let rel_std_dev = ((self._b + 1.) / (self._b - 1.) * self.lnb - 1.) / self.m as f64;
let rel_std_dev = rel_std_dev.sqrt();
(cardinality, rel_std_dev)
}
pub fn reinit(&mut self) {
self.permut_generator.reset();
self.k_vec = (0..self.m).map(|_| I::zero()).collect();
self.lower_k = 0.;
self.nbmin = 0;
self.nb_overflow = 0;
}
pub fn merge(&mut self, other: &SetSketcher<I, T, H>) -> anyhow::Result<()> {
if self.m != other.m || self.q != other.q {
return Err(anyhow!("non mergeable : different sketching parameters"));
}
if (self._b - other._b).abs() / self._b >= f64::EPSILON
|| (self.a - other.a).abs() / self.a >= f64::EPSILON
{
return Err(anyhow!("non mergeable : different sketching parameters"));
}
for i in 0..self.k_vec.len() {
self.k_vec[i] = self.k_vec[i].max(other.k_vec[i]);
}
self.nb_overflow += other.nb_overflow;
Ok(())
}
pub fn get_signature(&self) -> &Vec<I> {
&self.k_vec
}
#[inline(always)]
pub fn get_hsketch(&self) -> &Vec<I> {
self.get_signature()
}
}
#[derive(Copy, Clone, Debug)]
struct MleCost {
dplus: f64,
dless: f64,
dequal: f64,
u: f64,
v: f64,
b: f64,
}
impl MleCost {
fn new(dplus: f64, dless: f64, dequal: f64, u: f64, v: f64, b: f64) -> Self {
MleCost {
dplus,
dless,
dequal,
u,
v,
b,
}
}
fn pb(&self, x: f64) -> f64 {
let val = if x <= 0. {
let arg = -x * (self.b - 1.) / self.b;
-arg.ln_1p() / (self.b - 1.).ln_1p()
} else {
let arg = 1. - x * (self.b - 1.) / self.b;
-arg.ln() / (self.b - 1.).ln_1p()
};
assert!(!val.is_nan());
val
} }
impl CostFunction for MleCost {
type Param = f64;
type Output = f64;
fn cost(&self, j: &Self::Param) -> Result<Self::Output, argmin::core::Error> {
let pbplus = self.pb(self.u - self.v * j);
let pbless = self.pb(self.v - self.u * j);
let log_likelyhood = self.dplus * pbplus.ln()
+ self.dless * pbless.ln()
+ self.dequal * (1. - pbplus - pbless).ln();
Ok(-log_likelyhood)
} }
pub struct MleJaccard {
b: f64,
m: u64,
a: f64,
lnb: f64,
}
impl From<SetSketchParams> for MleJaccard {
fn from(params: SetSketchParams) -> Self {
MleJaccard::new(params.get_b(), params.get_m(), params.get_a())
}
}
impl MleJaccard {
pub fn new(b: f64, m: u64, a: f64) -> Self {
let lnb = (b - 1.).ln_1p(); MleJaccard { b, m, a, lnb }
}
pub fn get_cardinal_estimate<I>(&self, sketch: &[I]) -> f64
where
I: Integer + Bounded + ToPrimitive + FromPrimitive + Copy + Clone + Send + Sync,
[I]: ParallelSlice<I>,
{
assert_eq!(self.m, sketch.len() as u64);
let sumbk: f64 = sketch
.into_par_iter()
.map(|c| (-(*c).to_f64().unwrap() * (self.b - 1.).ln_1p()).exp())
.sum();
let cardinality: f64 = self.m as f64 * (1. - 1. / self.b) / (self.a * self.lnb * sumbk);
cardinality
}
pub fn get_mle<I>(&self, sketch1: &[I], sketch2: &[I]) -> Option<f64>
where
I: Integer + Bounded + ToPrimitive + FromPrimitive + Copy + Clone + Send + Sync,
[I]: ParallelSlice<I>,
{
assert_eq!(self.m, sketch1.len() as u64);
assert_eq!(self.m, sketch2.len() as u64);
let card1 = self.get_cardinal_estimate(sketch1);
let card2 = self.get_cardinal_estimate(sketch2);
log::info!("mle_jaccard card1 : {}, card2 : {}", card1, card2);
let u = card1 / (card1 + card2);
let v = card2 / (card1 + card2);
let mut dplus: u32 = 0;
let mut dless: u32 = 0;
let mut dequal: u32 = 0;
for i in 0..sketch1.len() {
if sketch1[i] > sketch2[i] {
dplus += 1;
} else if sketch1[i] < sketch2[i] {
dless += 1;
} else {
dequal += 1;
}
}
let b_inf = 0.;
let aux = card1 / card2;
log::debug!("get_cardinal_estimate : {}", aux);
let b_sup = aux.min(1. / aux);
log::info!(
"mle_jaccard interval : ({} , {}) , dequal: {}",
b_inf,
b_sup,
dequal
);
let jac = dequal as f64 / self.m as f64;
let solver = GoldenSectionSearch::new(b_inf, b_sup).unwrap();
let init_param = jac;
let cost = MleCost::new(dplus as f64, dless as f64, dequal as f64, u, v, self.b);
let exec =
Executor::new(cost, solver).configure(|state| state.param(init_param).max_iters(100));
#[cfg(feature = "slog")]
let exec = exec.add_observer(SlogLogger::term(), ObserverMode::Always);
let res = exec.run().unwrap();
log::info!("res : {:#}", res);
let state = res.state();
log::info!("state : {:#?}", state);
log::info!(
"best solution (J): {:#?}, cost : {:#?}",
state.best_param,
state.best_cost
);
log::info!("\n trying simple exploration : ");
struct Best {
jac: f64,
opt: f64,
}
let mut best = Best {
jac: 0.,
opt: f64::MAX,
};
for i in 0..5 {
let jac_pertu = jac * (1. + i as f64 / 200.);
let j_mle_pertu = cost.cost(&jac_pertu).unwrap();
if j_mle_pertu < best.opt {
best = Best {
jac: jac_pertu,
opt: j_mle_pertu,
};
}
log::info!(" j : {}, le : {} ", jac_pertu, j_mle_pertu);
}
for i in 0..5 {
let jac_pertu = jac * (1. - i as f64 / 200.);
let j_mle_pertu = cost.cost(&jac_pertu).unwrap();
if j_mle_pertu < best.opt {
best = Best {
jac: jac_pertu,
opt: j_mle_pertu,
};
}
log::info!(" j : {}, le : {} ", jac_pertu, j_mle_pertu);
}
log::info!("jmle : {}, opt : {}", best.jac, best.opt);
let _j_b1 = self.get_mle_approx_b1(sketch1, sketch2).unwrap();
state.best_param
}
fn get_mle_approx_b1<I>(&self, sketch1: &[I], sketch2: &[I]) -> Option<f64>
where
I: Integer + Bounded + ToPrimitive + FromPrimitive + Copy + Clone + Send + Sync,
[I]: ParallelSlice<I>,
{
assert_eq!(self.m, sketch1.len() as u64);
assert_eq!(self.m, sketch2.len() as u64);
let card1 = self.get_cardinal_estimate(sketch1);
let card2 = self.get_cardinal_estimate(sketch2);
log::info!("mle_jaccard card1 : {}, card2 : {}", card1, card2);
let u = card1 / (card1 + card2);
let v = card2 / (card1 + card2);
let mut dplus: u32 = 0;
let mut dless: u32 = 0;
let mut dequal: u32 = 0;
for i in 0..sketch1.len() {
if sketch1[i] > sketch2[i] {
dplus += 1;
} else if sketch1[i] < sketch2[i] {
dless += 1;
} else {
dequal += 1;
}
}
let dplus = dplus as f64;
let dless = dless as f64;
let dequal: f64 = dequal as f64;
let mut aux = u * u * (dless + dequal) - v * v * (dplus + dequal);
aux = aux * aux;
let mut j: f64 = u * u * (dless + dequal) + v * v * (dplus + dequal)
- (aux + 4. * dless * dplus * (u * v) * (u * v)).sqrt();
j /= 2. * u * v * self.m as f64;
log::info!(" j mle approx for b -> 1 : {:?}", j);
Some(j)
} }
#[cfg(test)]
mod tests {
use super::*;
use crate::jaccard::*;
use fnv::FnvHasher;
use rand::distr::Uniform;
#[allow(dead_code)]
fn log_init_test() {
let _ = env_logger::builder().is_test(true).try_init();
}
#[test]
fn test_params_bounds() {
log_init_test();
#[allow(unused_mut)]
let mut params = SetSketchParams::default();
log::info!("params : {:?}", params);
let nb_frac = 50;
for j in 1..=nb_frac {
let jac = (j as f64) / (nb_frac as f64);
let (jinf, jsup) = params.get_jaccard_bounds(jac);
let delta = 100. * (jsup - jinf) / jac;
log::info!(
"j = {}, jinf : {:.5e}, jsup = {:.5e}, delta% : {:.3}",
jac,
jinf,
jsup,
delta
);
}
}
#[test]
fn test_range_inter1_hll_fnv_f32() {
log_init_test();
let va: Vec<usize> = (0..1000).collect();
let vb: Vec<usize> = (900..2000).collect();
let inter = 100; let jexact = inter as f32 / 2000 as f32;
let nb_sketch = 2000;
let mut params = SetSketchParams::default();
params.set_m(nb_sketch);
let mut sethasher: SetSketcher<u16, usize, FnvHasher> =
SetSketcher::new(params, BuildHasherDefault::<FnvHasher>::default());
let resa = sethasher.sketch_slice(&va);
if !resa.is_ok() {
println!("error in sketcing va");
return;
}
let ska = sethasher.get_signature().clone();
sethasher.reinit();
let resb = sethasher.sketch_slice(&vb);
if !resb.is_ok() {
println!("error in sketching vb");
return;
}
let skb = sethasher.get_signature();
log::debug!("ska = {:?}", ska);
log::debug!("skb = {:?}", skb);
let jac = get_jaccard_index_estimate(&ska, &skb).unwrap();
let sigma = (jexact * (1. - jexact) / params.get_m() as f32).sqrt();
log::info!(
" jaccard estimate {:.3e}, j exact : {:.3e} , sigma : {:.3e}",
jac,
jexact,
sigma
);
assert!(jac > 0. && (jac as f32) < jexact + 3. * sigma);
}
#[test]
fn test_range_inter2_hll_fnv_f32() {
log_init_test();
let vb_max = 20000;
let vb_min = 10000;
let va_min = 500;
let va_max = 10100;
let va: Vec<usize> = (va_min..va_max).collect();
let vb: Vec<usize> = (vb_min..vb_max).collect();
let jexact = (va_max - vb_min) as f32 / (vb_max - va_min) as f32;
let nb_sketch = 4000;
let mut params = SetSketchParams::default();
params.set_m(nb_sketch);
let mut sethasher: SetSketcher<u16, usize, FnvHasher> =
SetSketcher::new(params, BuildHasherDefault::<FnvHasher>::default());
let resa = sethasher.sketch_slice(&va);
if !resa.is_ok() {
println!("error in sketcing va");
return;
}
let low_sketch = sethasher.get_low_sketch();
log::info!("lowest sketch : {}", low_sketch);
assert!(low_sketch > 0);
let cardinal = sethasher.get_cardinal_stats();
log::info!(
"cardinal of set a : {:.3e} relative stddev : {:.3e}",
cardinal.0,
cardinal.1
);
let ska = sethasher.get_signature().clone();
sethasher.reinit();
let resb = sethasher.sketch_slice(&vb);
if !resb.is_ok() {
println!("error in sketching vb");
return;
}
let skb = sethasher.get_signature();
let cardinal = sethasher.get_cardinal_stats();
log::info!(
"cardinal of set b : {:.3e} relative stddev : {:.3e}",
cardinal.0,
cardinal.1
);
log::debug!("ska = {:?}", ska);
log::debug!("skb = {:?}", skb);
let jac = get_jaccard_index_estimate(&ska, &skb).unwrap();
let sigma = (jexact * (1. - jexact) / params.get_m() as f32).sqrt();
log::info!(
" jaccard estimate {:.3e}, j exact : {:.3e} , sigma : {:.3e}",
jac,
jexact,
sigma
);
assert!(jac > 0. && (jac as f32) < jexact + 3. * sigma);
}
#[test]
fn test_hll_card_with_repetition() {
log_init_test();
let vamax = 200;
let nb_sketch = 5000;
let mut params = SetSketchParams::default();
params.set_m(nb_sketch);
let mut sethasher: SetSketcher<u16, usize, FnvHasher> =
SetSketcher::new(params, BuildHasherDefault::<FnvHasher>::default());
let unif = Uniform::<usize>::new(0, vamax).unwrap();
let mut rng = Xoshiro256PlusPlus::seed_from_u64(45679 as u64);
for _ in 0..nb_sketch {
sethasher.sketch(&unif.sample(&mut rng)).unwrap();
}
let cardinal = sethasher.get_cardinal_stats();
log::info!(
"cardinal of set b : {:.3e} relative stddev : {:.3e}",
cardinal.0,
cardinal.1
);
}
#[test]
fn test_merge_1() {
log_init_test();
let vbmax = 2000;
let va: Vec<usize> = (0..1000).collect();
let vb: Vec<usize> = (900..vbmax).collect();
let union = 2000.;
let nb_sketch = 4000;
let mut params = SetSketchParams::default();
params.set_m(nb_sketch);
let mut sethasher_a: SetSketcher<u16, usize, FnvHasher> =
SetSketcher::new(params, BuildHasherDefault::<FnvHasher>::default());
let resa = sethasher_a.sketch_slice(&va);
if !resa.is_ok() {
println!("error in sketcing va");
return;
}
let mut sethasher_b: SetSketcher<u16, usize, FnvHasher> =
SetSketcher::new(params, BuildHasherDefault::<FnvHasher>::default());
let resb = sethasher_b.sketch_slice(&vb);
if !resb.is_ok() {
println!("error in sketcing vb");
return;
}
let res = sethasher_a.merge(&sethasher_b);
assert!(res.is_ok());
let (mean, std) = sethasher_a.get_cardinal_stats();
log::info!("mean , std : {:.3e}, {:.3e}", mean, std);
assert!((mean - union).abs() / union <= 2.0 * std);
let jac =
get_jaccard_index_estimate(&sethasher_a.get_signature(), &sethasher_b.get_signature())
.unwrap();
let jexact = vb.len() as f32 / vbmax as f32;
let sigma = (jexact * (1. - jexact) / params.get_m() as f32).sqrt();
log::info!(
" jaccard estimate {:.3e}, j exact : {:.3e} , sigma : {:.3e}",
jac,
jexact,
sigma
);
assert!(jac > 0. && (jac as f32) < jexact + 3. * sigma);
for i in 500..2500 {
sethasher_a.sketch(&i).unwrap();
}
log::info!("after adding in merged sethasher_a");
let (mean, std) = sethasher_a.get_cardinal_stats();
log::info!("mean , relative std : {:.3e}, {:.3e}", mean, std);
}
#[test]
fn test_mle_1() {
log_init_test();
let vbmax = 2000;
let vbmin = 995;
let vamax = 1000;
assert!(vamax > vbmin);
let va: Vec<usize> = (0..vamax).collect();
let vb: Vec<usize> = (vbmin..vbmax).collect();
let nb_sketch = 6000;
let mut params = SetSketchParams::default();
params.set_m(nb_sketch);
let mut sethasher_a: SetSketcher<u16, usize, FnvHasher> =
SetSketcher::new(params, BuildHasherDefault::<FnvHasher>::default());
let resa = sethasher_a.sketch_slice(&va);
if !resa.is_ok() {
println!("error in sketcing va");
return;
}
let mut sethasher_b: SetSketcher<u16, usize, FnvHasher> =
SetSketcher::new(params, BuildHasherDefault::<FnvHasher>::default());
let resb = sethasher_b.sketch_slice(&vb);
if !resb.is_ok() {
println!("error in sketcing vb");
return;
}
log::info!("test_mle : vbmax : {}, nbsketch : {}", vbmax, nb_sketch);
let jac =
get_jaccard_index_estimate(&sethasher_a.get_signature(), &sethasher_b.get_signature())
.unwrap();
let jexact = (vamax - vbmin) as f32 / vbmax as f32;
let sigma = (jexact * (1. - jexact) / params.get_m() as f32).sqrt();
log::info!(
" jaccard estimate {:.3e}, j exact : {:.3e} , sigma : {:.3e}",
jac,
jexact,
sigma
);
assert!(jac > 0. && (jac as f32) < jexact + 3. * sigma);
let mle_jaccard = MleJaccard::from(params);
let bounds = params.get_jaccard_bounds(jac);
log::info!("bounds for jaccard estimate : {:#?}", bounds);
let j = mle_jaccard.get_mle(&sethasher_a.get_signature(), sethasher_b.get_signature());
log::info!("j mle : {}", j.unwrap());
} }