1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
// (c) Copyright 2025 Helsing GmbH. All rights reserved.
use rand::Rng as _;
use rand_distr::Distribution;
use std::{
collections::BTreeSet,
fs::File,
io::{BufRead, BufReader, Write},
ops::Bound,
path::Path,
};
/// Increment to indicate that random_dots.rs generated by an older build.rs must be re-generated.
const GEN: usize = 1;
fn main() {
println!("cargo:rustc-link-arg-benches=-rdynamic");
println!("cargo:rerun-if-changed=build.rs");
let gencheck = format!("// gen-version={GEN}");
// generate sets of random Dots to use in benchmarks.
// we generate this file rather than committing it so that the logic for generating it is
// checked in somewhere and documented, which also means we can change it if we wish. the
// variability of generated dots themselves should (hopefully) not influence the measurements
// too much. and even if they do, that's also a valuable signal.
let out_dir = std::env::var_os("OUT_DIR").unwrap();
let dest_path = Path::new(&out_dir).join("random_dots.rs");
// don't re-generate if it already exists to ensure benchmarks
// of re-builds (e.g., tango) use the same data sets.
// but _do_ generate if build.rs has changed its code version.
if dest_path.exists() {
if BufReader::new(File::open(&dest_path).unwrap())
.lines()
.next()
.unwrap()
.unwrap()
== gencheck
{
eprintln!("existing random_dots.rs is new enough");
return;
} else {
eprintln!("existing random_dots.rs is outdated");
}
} else {
eprintln!("no existing random_dots.rs, so generating");
}
let mut out = File::create(dest_path).unwrap();
writeln!(out, "{gencheck}").unwrap();
let mut rng = rand::rng();
// NOTE: 2.2 is arbitrarily chosen here as a "quite skewed" distribution, meaning
// we'll get nearly all dots near the beginning of the sequence (i.e., consecutive dots),
// and only a few sparse dots in the tail (i.e., in the dot cloud).
let zipf = rand_distr::Zipf::new(TOP as f64, 2.2).unwrap();
const TOP: usize = 8192;
const IDS: usize = 8;
for i in [1, 2] {
let mut all = BTreeSet::new();
for id in 1..=IDS {
// higher ids have fewer holes because we sample more
for _ in 0..(id * 4 * TOP) {
let sample = zipf.sample(&mut rng) as usize;
all.insert((id, sample));
}
}
writeln!(out, "#[allow(dead_code)]").unwrap();
writeln!(out, "pub(crate) const BIG{i}: &[Dot] = &[").unwrap();
for &(id, seq) in &all {
writeln!(out, "Dot::mint(Identifier::new({id}, 0), {seq}),").unwrap();
}
writeln!(out, "];").unwrap();
let mut in_small = BTreeSet::new();
// small only has new dots from a subset of the IDs, as that's more common
// (this goes to 3, not IDS)
for id in 1..=3 {
// one sender has a fair number of new dots, one has some, one has few
let adj = match id {
1 => 1.0,
2 => 0.5,
3 => 0.1,
_ => unreachable!(),
};
let max = all
.range((Bound::Included((id, 1)), Bound::Included((id, usize::MAX))))
.next_back()
.map(|(_, seq)| seq)
.copied()
.unwrap_or(1);
for seq in 1..=TOP {
if all.contains(&(id, seq)) {
// small should contain a few dots (~5) that are in BIG
if rng.random_bool(adj * 10.0 / TOP as f64) {
in_small.insert((id, seq));
}
} else {
// some that fill holes (preferring early ones)
// NOTE: the * 2.0 in there is because we're iterating through all the
// seqs, so the last value will average to 0.5, which would otherwise
// effectively halve the n/TOP value.
if seq < max {
if rng.random_bool(
adj * (20.0 / TOP as f64) * 2.0 * (1.0 - seq as f64 / max as f64),
) {
in_small.insert((id, seq));
}
} else {
// and a small amount that extend the max
// biased towards those close to max
if rng.random_bool(
adj * (5.0 / TOP as f64)
* 2.0
* (1.0 - (seq - max) as f64 / (TOP - max) as f64),
) {
in_small.insert((id, seq));
}
}
}
}
}
writeln!(out, "#[allow(dead_code)]").unwrap();
writeln!(out, "pub(crate) const SMALL{i}: &[Dot] = &[").unwrap();
for &(id, seq) in &in_small {
writeln!(out, "Dot::mint(Identifier::new({id}, 0), {seq}),").unwrap();
}
writeln!(out, "];").unwrap();
}
out.flush().unwrap();
}