nym_inclusion_probability/
lib.rs

1//! Active set inclusion probability simulator
2
3use std::time::{Duration, Instant};
4
5use error::Error;
6use rand::Rng;
7
8mod error;
9
10const TOLERANCE_L2_NORM: f64 = 1e-4;
11const TOLERANCE_MAX_NORM: f64 = 1e-4;
12
13pub struct SelectionProbability {
14    pub active_set_probability: Vec<f64>,
15    pub reserve_set_probability: Vec<f64>,
16    pub samples: u64,
17    pub time: Duration,
18    pub delta_l2: f64,
19    pub delta_max: f64,
20}
21
22pub fn simulate_selection_probability_mixnodes<R>(
23    list_stake_for_mixnodes: &[u128],
24    active_set_size: usize,
25    reserve_set_size: usize,
26    max_samples: u64,
27    max_time: Duration,
28    rng: &mut R,
29) -> Result<SelectionProbability, Error>
30where
31    R: Rng + ?Sized,
32{
33    log::trace!("Simulating mixnode active set selection probability");
34
35    // In case the active set size is larger than the number of bonded mixnodes, they all have 100%
36    // chance we don't have to go through with the simulation
37    if list_stake_for_mixnodes.len() <= active_set_size {
38        return Ok(SelectionProbability {
39            active_set_probability: vec![1.0; list_stake_for_mixnodes.len()],
40            reserve_set_probability: vec![0.0; list_stake_for_mixnodes.len()],
41            samples: 0,
42            time: Duration::ZERO,
43            delta_l2: 0.0,
44            delta_max: 0.0,
45        });
46    }
47
48    // Total number of existing (registered) nodes
49    let num_mixnodes = list_stake_for_mixnodes.len();
50
51    // Cumulative stake ordered by node index
52    let list_cumul = cumul_sum(list_stake_for_mixnodes);
53
54    // The computed probabilities
55    let mut active_set_probability = vec![0.0; num_mixnodes];
56    let mut reserve_set_probability = vec![0.0; num_mixnodes];
57
58    // Number sufficiently large to have a good approximation of selection probability
59    let mut samples = 0;
60    let mut delta_l2;
61    let mut delta_max;
62
63    // Make sure we bound the time we allow it to run
64    let start_time = Instant::now();
65
66    loop {
67        samples += 1;
68        let mut sample_active_mixnodes = Vec::new();
69        let mut sample_reserve_mixnodes = Vec::new();
70        let mut list_cumul_temp = list_cumul.clone();
71
72        let active_set_probability_previous = active_set_probability.clone();
73
74        // Select the active nodes for the epoch (hour)
75        while sample_active_mixnodes.len() < active_set_size
76            && sample_active_mixnodes.len() < list_cumul_temp.len()
77        {
78            let candidate = sample_candidate(&list_cumul_temp, rng)?;
79
80            if !sample_active_mixnodes.contains(&candidate) {
81                sample_active_mixnodes.push(candidate);
82                remove_mixnode_from_cumul_stake(candidate, &mut list_cumul_temp);
83            }
84        }
85
86        // Select the reserve nodes for the epoch (hour)
87        while sample_reserve_mixnodes.len() < reserve_set_size
88            && sample_reserve_mixnodes.len() + sample_active_mixnodes.len() < list_cumul_temp.len()
89        {
90            let candidate = sample_candidate(&list_cumul_temp, rng)?;
91
92            if !sample_reserve_mixnodes.contains(&candidate)
93                && !sample_active_mixnodes.contains(&candidate)
94            {
95                sample_reserve_mixnodes.push(candidate);
96                remove_mixnode_from_cumul_stake(candidate, &mut list_cumul_temp);
97            }
98        }
99
100        // Sum up nodes being in active or reserve set
101        for active_mixnodes in sample_active_mixnodes {
102            active_set_probability[active_mixnodes] += 1.0;
103        }
104        for reserve_mixnodes in sample_reserve_mixnodes {
105            reserve_set_probability[reserve_mixnodes] += 1.0;
106        }
107
108        // Convergence critera only on active set.
109        // We devide by samples to get the average, that is not really part of the delta
110        // computation.
111        delta_l2 =
112            l2_diff(&active_set_probability, &active_set_probability_previous)? / (samples as f64);
113        delta_max =
114            max_diff(&active_set_probability, &active_set_probability_previous)? / (samples as f64);
115        if samples > 10 && delta_l2 < TOLERANCE_L2_NORM && delta_max < TOLERANCE_MAX_NORM
116            || samples >= max_samples
117        {
118            break;
119        }
120
121        // Stop if we run out of time
122        if start_time.elapsed() > max_time {
123            log::debug!("Simulation ran out of time, stopping");
124            break;
125        }
126    }
127
128    // Divide occurrences with the number of samples once we're done to get the probabilities.
129    active_set_probability
130        .iter_mut()
131        .for_each(|x| *x /= samples as f64);
132    reserve_set_probability
133        .iter_mut()
134        .for_each(|x| *x /= samples as f64);
135
136    // Some sanity checks of the output
137    if active_set_probability.len() != num_mixnodes || reserve_set_probability.len() != num_mixnodes
138    {
139        return Err(Error::ResultsShorterThanInput);
140    }
141
142    Ok(SelectionProbability {
143        active_set_probability,
144        reserve_set_probability,
145        samples,
146        time: start_time.elapsed(),
147        delta_l2,
148        delta_max,
149    })
150}
151
152// Compute the cumulative sum
153fn cumul_sum<'a>(list: impl IntoIterator<Item = &'a u128>) -> Vec<u128> {
154    let mut list_cumul = Vec::new();
155    let mut cumul = 0;
156    for entry in list {
157        cumul += entry;
158        list_cumul.push(cumul);
159    }
160    list_cumul
161}
162
163fn sample_candidate<R>(list_cumul: &[u128], rng: &mut R) -> Result<usize, Error>
164where
165    R: Rng + ?Sized,
166{
167    use rand::distributions::{Distribution, Uniform};
168    let uniform = Uniform::from(0..*list_cumul.last().ok_or(Error::EmptyListCumulStake)?);
169    let r = uniform.sample(rng);
170
171    let candidate = list_cumul
172        .iter()
173        .enumerate()
174        .find(|(_, x)| *x >= &r)
175        .ok_or(Error::SamplePointOutOfBounds)?
176        .0;
177
178    Ok(candidate)
179}
180
181// Update list of cumulative stake to reflect eliminating the picked node
182fn remove_mixnode_from_cumul_stake(candidate: usize, list_cumul_stake: &mut [u128]) {
183    let prob_candidate = if candidate == 0 {
184        list_cumul_stake[0]
185    } else {
186        list_cumul_stake[candidate] - list_cumul_stake[candidate - 1]
187    };
188
189    for cumul in list_cumul_stake.iter_mut().skip(candidate) {
190        *cumul -= prob_candidate;
191    }
192}
193
194// Compute the difference in l2-norm
195fn l2_diff(v1: &[f64], v2: &[f64]) -> Result<f64, Error> {
196    if v1.len() != v2.len() {
197        return Err(Error::NormDifferenceSizeArrays);
198    }
199    Ok(v1
200        .iter()
201        .zip(v2)
202        .map(|(&i1, &i2)| (i1 - i2).powi(2))
203        .sum::<f64>()
204        .sqrt())
205}
206
207// Compute the difference in max-norm
208fn max_diff(v1: &[f64], v2: &[f64]) -> Result<f64, Error> {
209    if v1.len() != v2.len() {
210        return Err(Error::NormDifferenceSizeArrays);
211    }
212    Ok(v1
213        .iter()
214        .zip(v2)
215        .map(|(x, y)| (x - y).abs())
216        .fold(f64::NEG_INFINITY, f64::max))
217}
218
219#[cfg(test)]
220mod tests {
221    use rand::{SeedableRng, rngs::StdRng};
222
223    use super::*;
224
225    fn test_rng() -> StdRng {
226        StdRng::seed_from_u64(42)
227    }
228
229    #[test]
230    fn compute_cumul_sum() {
231        let v = cumul_sum(&vec![1, 2, 3]);
232        assert_eq!(v, &[1, 3, 6]);
233    }
234
235    #[test]
236    fn remove_mixnode_from_cumul() {
237        let mut cumul_stake = vec![1, 2, 3, 4, 5, 6];
238        remove_mixnode_from_cumul_stake(3, &mut cumul_stake);
239        assert_eq!(cumul_stake, &[1, 2, 3, 3, 4, 5]);
240    }
241
242    #[test]
243    fn max_norm() {
244        let v1 = vec![1.0, 2.0, 3.0];
245        let v2 = vec![2.0, 4.0, -6.0];
246        assert!((max_diff(&v1, &v2).unwrap() - 9.0).abs() < f64::EPSILON);
247    }
248
249    #[test]
250    fn ls_norm() {
251        let v1 = vec![1.0, 2.0, 3.0];
252        let v2 = vec![2.0, 3.0, -2.0];
253        assert!((l2_diff(&v1, &v2).unwrap() - 5.196_152_422_706_632).abs() < 1e2 * f64::EPSILON);
254    }
255
256    // Replicate the results from the Python simulation code in https://github.com/nymtech/team-core/issues/114
257    #[test]
258    fn replicate_python_simulation() {
259        let active_set_size = 4;
260        let standby_set_size = 1;
261
262        // this has to contain the total stake per node
263        let list_mix = vec![
264            100, 100, 3000, 500_000, 100, 10, 10, 10, 10, 10, 30000, 500, 200, 52345,
265        ];
266
267        let max_samples = 100_000;
268        let max_time = Duration::from_secs(10);
269        let mut rng = test_rng();
270
271        let SelectionProbability {
272            active_set_probability,
273            reserve_set_probability,
274            samples,
275            time,
276            delta_l2,
277            delta_max,
278        } = simulate_selection_probability_mixnodes(
279            &list_mix,
280            active_set_size,
281            standby_set_size,
282            max_samples,
283            max_time,
284            &mut rng,
285        )
286        .unwrap();
287
288        // Check that any possible test failure wasn't because we ran it on 1970s hardware, and the
289        // sampling aborted prematurely due to hitting `max_time`.
290        assert!(time < max_time);
291
292        // These values comes from running the python simulator for a very long time
293        let expected_active_set_probability = vec![
294            0.025_070_8,
295            0.025_073_2,
296            0.744_117,
297            0.999_999,
298            0.025_000_2,
299            0.002_524_4,
300            0.002_527_8,
301            0.002_528_6,
302            0.002_569_6,
303            0.002_513_6,
304            0.994,
305            0.125_482_8,
306            0.050_279_8,
307            0.998_313_2,
308        ];
309        // The same check is used in the convergence criterion, and hence should be reflected in
310        // `delta_max` too.
311        assert!(
312            max_diff(&active_set_probability, &expected_active_set_probability).unwrap() < 1e-2
313        );
314
315        let expected_reserve_set_probability = vec![
316            0.076_392_4,
317            0.076_499,
318            0.204_893_6,
319            1e-06,
320            0.076_278_8,
321            0.007_720_6,
322            0.007_673,
323            0.007_700_2,
324            0.007_669_4,
325            0.007_731_2,
326            0.005_789_4,
327            0.368_465_6,
328            0.151_537_2,
329            0.001_648_6,
330        ];
331        assert!(
332            max_diff(&reserve_set_probability, &expected_reserve_set_probability).unwrap() < 1e-2
333        );
334
335        // We converge around 20_000, add another 500 for some slack due to random values
336        assert_eq!(samples, 20_001);
337        assert!(delta_l2 < TOLERANCE_L2_NORM);
338        assert!(delta_max < TOLERANCE_MAX_NORM);
339    }
340
341    #[test]
342    fn fewer_nodes_than_active_set_size() {
343        let active_set_size = 10;
344        let standby_set_size = 3;
345        let list_mix = vec![100, 100, 3000];
346        let max_samples = 100_000;
347        let max_time = Duration::from_secs(10);
348        let mut rng = test_rng();
349
350        let SelectionProbability {
351            active_set_probability,
352            reserve_set_probability,
353            samples,
354            time: _,
355            delta_l2,
356            delta_max,
357        } = simulate_selection_probability_mixnodes(
358            &list_mix,
359            active_set_size,
360            standby_set_size,
361            max_samples,
362            max_time,
363            &mut rng,
364        )
365        .unwrap();
366
367        // These values comes from running the python simulator for a very long time
368        let expected_active_set_probability = vec![1.0, 1.0, 1.0];
369        let expected_reserve_set_probability = vec![0.0, 0.0, 0.0];
370        assert!(
371            max_diff(&active_set_probability, &expected_active_set_probability).unwrap()
372                < 1e1 * f64::EPSILON
373        );
374        assert!(
375            max_diff(&reserve_set_probability, &expected_reserve_set_probability).unwrap()
376                < 1e1 * f64::EPSILON
377        );
378
379        // We converge around 20_000, add another 500 for some slack due to random values
380        assert_eq!(samples, 0);
381        assert!(delta_l2 < f64::EPSILON);
382        assert!(delta_max < f64::EPSILON);
383    }
384
385    #[test]
386    fn fewer_nodes_than_reward_set_size() {
387        let active_set_size = 4;
388        let standby_set_size = 3;
389        let list_mix = vec![100, 100, 3000, 342, 3_498_234];
390        let max_samples = 100_000_000;
391        let max_time = Duration::from_secs(10);
392        let mut rng = test_rng();
393
394        let SelectionProbability {
395            active_set_probability,
396            reserve_set_probability,
397            samples,
398            time: _,
399            delta_l2,
400            delta_max,
401        } = simulate_selection_probability_mixnodes(
402            &list_mix,
403            active_set_size,
404            standby_set_size,
405            max_samples,
406            max_time,
407            &mut rng,
408        )
409        .unwrap();
410
411        // These values comes from running the python simulator for a very long time
412        let expected_active_set_probability = vec![0.546, 0.538, 0.999, 0.915, 1.0];
413        let expected_reserve_set_probability = vec![0.453, 0.461, 0.0005, 0.084, 0.0];
414        assert!(
415            max_diff(&active_set_probability, &expected_active_set_probability).unwrap() < 1e-2,
416        );
417        assert!(
418            max_diff(&reserve_set_probability, &expected_reserve_set_probability).unwrap() < 1e-2,
419        );
420
421        // We converge around 20_000, add another 500 for some slack due to random values
422        assert_eq!(samples, 20_001);
423        assert!(delta_l2 < TOLERANCE_L2_NORM);
424        assert!(delta_max < TOLERANCE_MAX_NORM);
425    }
426}