hamming_dict/
lib.rs

1#![no_std]
2
3#[cfg(feature = "alloc")]
4extern crate alloc;
5
6#[cfg(feature = "alloc")]
7use alloc::{vec, vec::Vec};
8use bitarray::BitArray;
9use rand::{Rng, SeedableRng};
10use rand_xoshiro::Xoshiro256PlusPlus;
11
12/// Generate a dictionary with a specific number of `words` as [`BitArray`] with `B` bytes.
13///
14/// This is deterministic, and thus will always produce the same dictionary given the same
15/// `B` and `words` parameters.
16///
17/// Returns the codewords in the dictionary as a [`Vec`].
18#[cfg(feature = "alloc")]
19pub fn generate_dict<const B: usize>(words: usize) -> Vec<BitArray<B>> {
20    let mut dict = vec![BitArray::zeros(); words];
21    generate_dict_rand(&mut dict);
22    dict
23}
24
25/// Generate a dictionary with a specific number of `words` as [`BitArray`] with `B` bytes.
26///
27/// This is deterministic, and thus will always produce the same dictionary given the same
28/// `B`, `words`, and `seed` parameters.
29///
30/// Returns the codewords in the dictionary as a [`Vec`].
31#[cfg(feature = "alloc")]
32pub fn generate_dict_seed<const B: usize>(words: usize, seed: u64) -> Vec<BitArray<B>> {
33    let mut dict = vec![BitArray::zeros(); words];
34    generate_dict_rand_seed(&mut dict, seed);
35    dict
36}
37
38/// Generate a dictionary by mutating a slice of [`BitArray`].
39///
40/// This is deterministic, and thus will always produce the same dictionary given the same
41/// input slice of [`BitArray`] (`dict`).
42///
43/// It is recommended to pass random bytes as input, or this may take an extraordinarily
44/// long time to finish.
45///
46/// Useful if you want to use your own seed to produce unique codewords.
47pub fn generate_dict_from<const B: usize>(dict: &mut [BitArray<B>]) {
48    match dict.len() {
49        0 => return,
50        1 => return,
51        2..=64 => {
52            generate_dict_stage_2(dict);
53            return;
54        }
55        len => {
56            let (left, right) = dict.split_at_mut(len / 2);
57            generate_dict_from(left);
58            generate_dict_from(right);
59        }
60    }
61    generate_dict_stage_2(dict);
62}
63
64/// Generate a deterministic dictionary by mutating a slice of [`BitArray`].
65///
66/// The actual values of the input slice are ignored, as they are replaced with random values.
67/// This is deterministic based on the number of words in the dictionary (length of `dict`) only.
68///
69/// Internally this uses [`Xoshiro256PlusPlus`], calling [`SeedableRng::seed_from_u64`] with `0` as seed.
70pub fn generate_dict_rand<const B: usize>(dict: &mut [BitArray<B>]) {
71    generate_dict_rand_seed(dict, 0);
72}
73
74/// Generate a deterministic dictionary by mutating a slice of [`BitArray`].
75///
76/// The actual values of the input slice are ignored, as they are replaced with random values.
77/// This is deterministic based on the number of words in the dictionary (length of `dict`) only.
78///
79/// Internally this uses [`Xoshiro256PlusPlus`], calling [`SeedableRng::seed_from_u64`] with `seed` as seed.
80pub fn generate_dict_rand_seed<const B: usize>(dict: &mut [BitArray<B>], seed: u64) {
81    let mut rng = Xoshiro256PlusPlus::seed_from_u64(seed);
82    for word in &mut *dict {
83        for byte in &mut **word {
84            *byte = rng.gen();
85        }
86    }
87    generate_dict_from(dict);
88}
89
90fn generate_dict_stage_2<const B: usize>(dict: &mut [BitArray<B>]) {
91    let mut changed = true;
92    while changed {
93        changed = false;
94        for ix in 0..dict.len() {
95            'outer: loop {
96                // Immutable reference to item.
97                let word = &dict[ix];
98                // Compute the current sum of all distances from all words in the dictionary from this word.
99                let old_closest_distance = closest_distance_sum_distances(dict, word, ix);
100
101                // Go through every one bit mutation of the word.
102                for byte in 0..B {
103                    for bit in 0..8 {
104                        // Mutate the word by one bit.
105                        let mut new_word = *word;
106                        new_word[byte] ^= 1 << bit;
107
108                        // Check its new distance to all other words.
109                        // Subtract `1` because the word now has `1` extra distance from
110                        // itself, which shouldn't be counted.
111                        let closest_distance = closest_distance_sum_distances(dict, &new_word, ix);
112
113                        // Check if this distance is further from all bitstrings than the previous best.
114                        if closest_distance > old_closest_distance {
115                            // It was, so set this as the new word.
116                            dict[ix] = new_word;
117                            changed = true;
118                            continue 'outer;
119                        }
120                    }
121                }
122
123                break;
124            }
125        }
126    }
127}
128
129fn closest_distance_sum_distances<const B: usize>(
130    dict: &[BitArray<B>],
131    word: &BitArray<B>,
132    ignore: usize,
133) -> (u32, u64) {
134    let mut closest_distance = u32::MAX;
135    let mut sum_distances = 0;
136    for distance in dict
137        .iter()
138        .enumerate()
139        .filter(|&(ix, _)| ix != ignore)
140        .map(|(_, other_word)| other_word.distance(word))
141    {
142        sum_distances += distance as u64;
143        if distance < closest_distance {
144            closest_distance = distance;
145        }
146    }
147    (closest_distance, sum_distances)
148}