import sys
from tqdm.auto import trange
def compute_tau(x, m):
summation = 0
k = 1
x = (1 - x / m)
while True:
term = (1 - x**(2**(-k)))**2 * 2**(-k)
summation += term
if term < sys.float_info.epsilon:
break
k += 1
return m / 3.0 * (1 - x - summation)
def compute_sigma(x, m):
summation = 0
k = 1
x = x / m
try:
while True:
term = x**(2**k) * 2**(k - 1)
summation += term
if term < sys.float_info.epsilon:
break
k += 1
except OverflowError:
return float("inf")
return m * (x + summation)
def write_out_tau():
with open("src/tau_multiplicities_constants.rs", "w", encoding="utf8") as file:
file.write(
r"""//! This file is generated by the script multiplities_constants_precomputation.py
//! and contains the constants used in the computation of the cardinalities
//! in the HyperLogLog algorithm that employs the multiplicities of the registers.
//! The paper that describes the algorithm is the following
//! [New cardinality estimation algorithms for HyperLogLog sketches](https://oertl.github.io/hyperloglog-sketch-estimation-paper/paper/paper.pdf).
//! The constants are computed for all precisions between 4 and 18.
pub const TAU: [&[f32]; 15] = [
""")
for precision in trange(4, 19, desc="Precision", leave=False):
file.write(f" // precision {precision}\n")
file.write(" &[")
for multeplicity in range(0, 2**precision + 1):
tau = compute_tau(multeplicity, 2**precision)
tau = round(tau, 3)
if tau > 10000:
tau = round(tau, 2)
file.write(
f"\n {tau},")
file.write("\n ],\n")
file.write("];\n")
def write_out_sigma():
with open("src/sigma_multiplicities_constants.rs", "w", encoding="utf8") as file:
file.write(
r"""//! This file is generated by the script multiplities_constants_precomputation.py
//! and contains the constants used in the computation of the cardinalities
//! in the HyperLogLog algorithm that employs the multiplicities of the registers.
//! The paper that describes the algorithm is the following
//! [New cardinality estimation algorithms for HyperLogLog sketches](https://oertl.github.io/hyperloglog-sketch-estimation-paper/paper/paper.pdf).
//! The constants are computed for all precisions between 4 and 18.
pub const SIGMA: [&[f32]; 15] = [
""")
for precision in trange(4, 19, desc="Precision", leave=False):
file.write(f" // precision {precision}\n")
file.write(" &[")
for multeplicity in range(0, 2**precision + 1):
sigma = compute_sigma(multeplicity, 2**precision)
sigma = round(sigma, 6)
if sigma > 10:
sigma = round(sigma, 5)
if sigma > 100:
sigma = round(sigma, 4)
if sigma > 1000:
sigma = round(sigma, 3)
if sigma > 10000:
sigma = round(sigma, 2)
if sigma == 0.434294 or sigma == 0.4342:
sigma = "core::f32::consts::LOG10_E"
if sigma == 0.785398:
sigma = "core::f32::consts::FRAC_PI_4"
if sigma == 0.31831 or sigma == 0.318:
sigma = "core::f32::consts::FRAC_1_PI"
if sigma == float("inf"):
sigma = "core::f32::INFINITY"
file.write(
f"\n {sigma},")
file.write("\n ],\n")
file.write("];\n")
if __name__ == "__main__":
write_out_tau()
write_out_sigma()