embeddenator_testkit/
generators.rs1use embeddenator_vsa::SparseVec;
10use rand::Rng;
11use std::collections::HashSet;
12
13pub fn random_sparse_vec(rng: &mut impl Rng, dims: usize, sparsity: usize) -> SparseVec {
28 let mut used: HashSet<usize> = HashSet::with_capacity(sparsity.saturating_mul(2));
29 let mut pos = Vec::with_capacity(sparsity / 2);
30 let mut neg = Vec::with_capacity(sparsity / 2);
31
32 let target_each = sparsity / 2;
34 while pos.len() < target_each {
35 let idx = rng.random_range(0..dims);
36 if used.insert(idx) {
37 pos.push(idx);
38 }
39 }
40 while neg.len() < target_each {
41 let idx = rng.random_range(0..dims);
42 if used.insert(idx) {
43 neg.push(idx);
44 }
45 }
46
47 pos.sort_unstable();
48 neg.sort_unstable();
49 SparseVec { pos, neg }
50}
51
52pub fn mk_random_sparsevec(rng: &mut impl Rng, dims: usize, sparsity: usize) -> SparseVec {
54 random_sparse_vec(rng, dims, sparsity)
55}
56
57pub fn deterministic_sparse_vec(dim: usize, nnz: usize, seed: u64) -> SparseVec {
72 let pos_count = nnz / 2;
74 let neg_count = nnz - pos_count;
75
76 let mut state = seed;
77 let lcg = |s: &mut u64| -> u64 {
78 *s = s.wrapping_mul(6364136223846793005).wrapping_add(1);
79 *s
80 };
81
82 let mut pos = Vec::with_capacity(pos_count);
83 let mut neg = Vec::with_capacity(neg_count);
84 let mut used = HashSet::new();
85
86 for _ in 0..pos_count {
87 loop {
88 let idx = (lcg(&mut state) as usize) % dim;
89 if used.insert(idx) {
90 pos.push(idx);
91 break;
92 }
93 }
94 }
95
96 for _ in 0..neg_count {
97 loop {
98 let idx = (lcg(&mut state) as usize) % dim;
99 if used.insert(idx) {
100 neg.push(idx);
101 break;
102 }
103 }
104 }
105
106 pos.sort_unstable();
107 neg.sort_unstable();
108
109 SparseVec { pos, neg }
110}
111
112fn intersection_count_sorted(a: &[usize], b: &[usize]) -> usize {
114 let mut i = 0;
115 let mut j = 0;
116 let mut count = 0;
117 while i < a.len() && j < b.len() {
118 match a[i].cmp(&b[j]) {
119 std::cmp::Ordering::Less => i += 1,
120 std::cmp::Ordering::Greater => j += 1,
121 std::cmp::Ordering::Equal => {
122 count += 1;
123 i += 1;
124 j += 1;
125 }
126 }
127 }
128 count
129}
130
131pub fn sparse_dot(a: &SparseVec, b: &SparseVec) -> i32 {
142 let pp = intersection_count_sorted(&a.pos, &b.pos) as i32;
143 let nn = intersection_count_sorted(&a.neg, &b.neg) as i32;
144 let pn = intersection_count_sorted(&a.pos, &b.neg) as i32;
145 let np = intersection_count_sorted(&a.neg, &b.pos) as i32;
146 (pp + nn) - (pn + np)
147}
148
149pub fn generate_noise_pattern(size: usize, seed: u64) -> Vec<u8> {
153 let mut data = Vec::with_capacity(size);
154 let mut state = seed;
155 for _ in 0..size {
156 state = state.wrapping_mul(6364136223846793005).wrapping_add(1);
158 data.push((state >> 56) as u8);
159 }
160 data
161}
162
163pub fn generate_gradient_pattern(width: usize, height: usize) -> Vec<u8> {
165 let mut data = Vec::with_capacity(width * height);
166 for y in 0..height {
167 for x in 0..width {
168 let val = ((x + y) * 255) / (width + height);
170 data.push(val as u8);
171 }
172 }
173 data
174}
175
176pub fn generate_binary_blob(size: usize) -> Vec<u8> {
178 let mut data = Vec::with_capacity(size);
179
180 if size >= 16 {
182 data.extend_from_slice(&[0x7f, b'E', b'L', b'F']);
183 data.extend_from_slice(&[2, 1, 1, 0]); data.extend_from_slice(&[0; 8]); }
186
187 let mut offset = data.len();
189 while offset < size {
190 let pattern_type = (offset / 256) % 4;
191 match pattern_type {
192 0 => data.push(0x90), 1 => data.push((offset & 0xFF) as u8), 2 => data.push(0x00), _ => data.push(0xCC), }
197 offset += 1;
198 }
199
200 data.truncate(size);
201 data
202}
203
204#[cfg(test)]
205mod tests {
206 use super::*;
207 use rand::SeedableRng;
208
209 #[test]
210 fn test_random_sparse_vec() {
211 let mut rng = rand::rngs::StdRng::seed_from_u64(42);
212 let vec = random_sparse_vec(&mut rng, 10000, 200);
213 let nnz = vec.pos.len() + vec.neg.len();
214 assert_eq!(nnz, 200);
215
216 assert!(vec.pos.windows(2).all(|w| w[0] < w[1]));
218 assert!(vec.neg.windows(2).all(|w| w[0] < w[1]));
219
220 let pos_set: HashSet<_> = vec.pos.iter().collect();
222 let neg_set: HashSet<_> = vec.neg.iter().collect();
223 assert_eq!(pos_set.intersection(&neg_set).count(), 0);
224 }
225
226 #[test]
227 fn test_deterministic_sparse_vec() {
228 let vec1 = deterministic_sparse_vec(10000, 200, 42);
229 let vec2 = deterministic_sparse_vec(10000, 200, 42);
230 assert_eq!(vec1.pos, vec2.pos);
231 assert_eq!(vec1.neg, vec2.neg);
232
233 let vec3 = deterministic_sparse_vec(10000, 200, 43);
235 assert_ne!(vec1.pos, vec3.pos);
236 }
237
238 #[test]
239 fn test_sparse_dot() {
240 let mut rng = rand::rngs::StdRng::seed_from_u64(42);
241 let a = random_sparse_vec(&mut rng, 10000, 200);
242 let b = random_sparse_vec(&mut rng, 10000, 200);
243
244 let dot = sparse_dot(&a, &b);
245
246 let dot_rev = sparse_dot(&b, &a);
248 assert_eq!(dot, dot_rev);
249 }
250
251 #[test]
252 fn test_generate_noise_pattern() {
253 let data1 = generate_noise_pattern(1000, 42);
254 let data2 = generate_noise_pattern(1000, 42);
255 assert_eq!(data1, data2);
256
257 let data3 = generate_noise_pattern(1000, 43);
258 assert_ne!(data1, data3);
259 }
260}