1#[allow(dead_code)]
7#[derive(Debug, Clone)]
9pub struct Distribution {
10 pub kind: String,
12 pub mean: f32,
13 pub std: f32,
14}
15
16#[allow(dead_code)]
17#[derive(Debug, Clone)]
19pub struct VariationAxis {
20 pub name: String,
21 pub min: f32,
22 pub max: f32,
23 pub distribution: Distribution,
24}
25
26#[allow(dead_code)]
27#[derive(Debug, Clone)]
29pub struct CrowdSpec {
30 pub n: usize,
31 pub axes: Vec<VariationAxis>,
32 pub seed: u64,
33}
34
35#[allow(dead_code)]
36#[derive(Debug, Clone)]
38pub struct CrowdMember {
39 pub id: usize,
40 pub params: Vec<(String, f32)>,
41 pub group_id: usize,
42}
43
44#[allow(dead_code)]
45#[derive(Debug, Clone)]
47pub struct Crowd {
48 pub members: Vec<CrowdMember>,
49 pub spec: CrowdSpec,
50}
51
52#[inline]
58fn lcg_step(state: &mut u64) {
59 *state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
60}
61
62pub fn lcg_sample(state: &mut u64, min: f32, max: f32) -> f32 {
64 lcg_step(state);
65 let t = (*state as f32) / (u64::MAX as f32);
66 min + t * (max - min)
67}
68
69pub fn lcg_gaussian(state: &mut u64, mean: f32, std: f32) -> f32 {
71 let u1 = lcg_sample(state, 1e-9, 1.0);
72 let u2 = lcg_sample(state, 0.0, 1.0);
73 let z = (-2.0 * u1.ln()).sqrt() * (2.0 * std::f32::consts::PI * u2).cos();
74 mean + std * z
75}
76
77fn sample_axis(state: &mut u64, axis: &VariationAxis) -> f32 {
79 let v = match axis.distribution.kind.as_str() {
80 "gaussian" => lcg_gaussian(state, axis.distribution.mean, axis.distribution.std),
81 "bimodal" => {
82 let which = lcg_sample(state, 0.0, 1.0);
84 let offset = if which < 0.5 {
85 -axis.distribution.std
86 } else {
87 axis.distribution.std
88 };
89 lcg_gaussian(
90 state,
91 axis.distribution.mean + offset,
92 axis.distribution.std * 0.5,
93 )
94 }
95 _ => lcg_sample(state, axis.min, axis.max), };
97 v.clamp(axis.min, axis.max)
98}
99
100pub fn generate_crowd(spec: &CrowdSpec) -> Crowd {
106 let mut state = spec.seed ^ 0xDEAD_BEEF_CAFE_BABE;
107 let members: Vec<CrowdMember> = (0..spec.n)
108 .map(|id| {
109 let params: Vec<(String, f32)> = spec
110 .axes
111 .iter()
112 .map(|ax| (ax.name.clone(), sample_axis(&mut state, ax)))
113 .collect();
114 CrowdMember {
115 id,
116 params,
117 group_id: 0,
118 }
119 })
120 .collect();
121 Crowd {
122 members,
123 spec: spec.clone(),
124 }
125}
126
127pub fn crowd_diversity_score(crowd: &Crowd) -> f32 {
129 let n = crowd.members.len();
130 if n < 2 {
131 return 0.0;
132 }
133 let mut total = 0.0f32;
134 let mut count = 0usize;
135 for i in 0..n {
136 for j in (i + 1)..n {
137 let a = &crowd.members[i].params;
138 let b = &crowd.members[j].params;
139 let dist: f32 = a
140 .iter()
141 .zip(b.iter())
142 .map(|(x, y)| (x.1 - y.1).powi(2))
143 .sum::<f32>()
144 .sqrt();
145 total += dist;
146 count += 1;
147 }
148 }
149 if count == 0 {
150 0.0
151 } else {
152 total / count as f32
153 }
154}
155
156pub fn cluster_crowd(crowd: &Crowd, k: usize) -> Vec<usize> {
158 let n = crowd.members.len();
159 if n == 0 || k == 0 {
160 return vec![];
161 }
162 let k = k.min(n);
163 let dim = crowd.members[0].params.len();
164
165 let mut centroids: Vec<Vec<f32>> = (0..k)
167 .map(|i| crowd.members[i].params.iter().map(|(_, v)| *v).collect())
168 .collect();
169
170 let mut assignments = vec![0usize; n];
171
172 for _ in 0..100 {
173 let mut changed = false;
175 for (i, m) in crowd.members.iter().enumerate() {
176 let vals: Vec<f32> = m.params.iter().map(|(_, v)| *v).collect();
177 let best = (0..k)
178 .min_by(|&a, &b| {
179 let da: f32 = vals
180 .iter()
181 .zip(¢roids[a])
182 .map(|(x, c)| (x - c).powi(2))
183 .sum();
184 let db: f32 = vals
185 .iter()
186 .zip(¢roids[b])
187 .map(|(x, c)| (x - c).powi(2))
188 .sum();
189 da.partial_cmp(&db).unwrap_or(std::cmp::Ordering::Equal)
190 })
191 .unwrap_or(0);
192 if assignments[i] != best {
193 assignments[i] = best;
194 changed = true;
195 }
196 }
197 if !changed {
198 break;
199 }
200 let mut sums = vec![vec![0.0f32; dim]; k];
202 let mut counts = vec![0usize; k];
203 for (i, m) in crowd.members.iter().enumerate() {
204 let c = assignments[i];
205 for (d, (_, v)) in m.params.iter().enumerate() {
206 sums[c][d] += v;
207 }
208 counts[c] += 1;
209 }
210 for c in 0..k {
211 if counts[c] > 0 {
212 for d in 0..dim {
213 centroids[c][d] = sums[c][d] / counts[c] as f32;
214 }
215 }
216 }
217 }
218 assignments
219}
220
221pub fn crowd_to_json(crowd: &Crowd) -> String {
223 let mut out = String::from("[");
224 for (i, m) in crowd.members.iter().enumerate() {
225 if i > 0 {
226 out.push(',');
227 }
228 out.push_str(&format!(
229 r#"{{"id":{},"group_id":{},"params":{{"#,
230 m.id, m.group_id
231 ));
232 for (j, (k, v)) in m.params.iter().enumerate() {
233 if j > 0 {
234 out.push(',');
235 }
236 out.push_str(&format!(r#""{}":{}."#, k, *v as i32));
237 out = out.trim_end_matches('.').to_string();
239 out.push_str(&format!("{:.4}", v));
240 }
242 out.push_str("}}");
243 }
244 out.push(']');
245
246 let mut result = String::from("[");
248 for (i, m) in crowd.members.iter().enumerate() {
249 if i > 0 {
250 result.push(',');
251 }
252 result.push_str(&format!(
253 r#"{{"id":{},"group_id":{},"params":{{"#,
254 m.id, m.group_id
255 ));
256 for (j, (name, val)) in m.params.iter().enumerate() {
257 if j > 0 {
258 result.push(',');
259 }
260 result.push_str(&format!(r#""{}":{:.4}"#, name, val));
261 }
262 result.push_str("}}");
263 }
264 result.push(']');
265 result
266}
267
268pub fn standard_crowd_axes() -> Vec<VariationAxis> {
270 vec![
271 VariationAxis {
272 name: "height".into(),
273 min: 1.50,
274 max: 2.05,
275 distribution: Distribution {
276 kind: "gaussian".into(),
277 mean: 1.75,
278 std: 0.08,
279 },
280 },
281 VariationAxis {
282 name: "weight".into(),
283 min: 45.0,
284 max: 130.0,
285 distribution: Distribution {
286 kind: "gaussian".into(),
287 mean: 75.0,
288 std: 15.0,
289 },
290 },
291 VariationAxis {
292 name: "age".into(),
293 min: 18.0,
294 max: 80.0,
295 distribution: Distribution {
296 kind: "uniform".into(),
297 mean: 49.0,
298 std: 18.0,
299 },
300 },
301 VariationAxis {
302 name: "muscle".into(),
303 min: 0.0,
304 max: 1.0,
305 distribution: Distribution {
306 kind: "gaussian".into(),
307 mean: 0.4,
308 std: 0.2,
309 },
310 },
311 VariationAxis {
312 name: "fat".into(),
313 min: 0.0,
314 max: 1.0,
315 distribution: Distribution {
316 kind: "gaussian".into(),
317 mean: 0.35,
318 std: 0.2,
319 },
320 },
321 VariationAxis {
322 name: "skin_tone".into(),
323 min: 0.0,
324 max: 1.0,
325 distribution: Distribution {
326 kind: "uniform".into(),
327 mean: 0.5,
328 std: 0.3,
329 },
330 },
331 VariationAxis {
332 name: "face_width".into(),
333 min: 0.8,
334 max: 1.2,
335 distribution: Distribution {
336 kind: "gaussian".into(),
337 mean: 1.0,
338 std: 0.08,
339 },
340 },
341 VariationAxis {
342 name: "leg_length".into(),
343 min: 0.8,
344 max: 1.2,
345 distribution: Distribution {
346 kind: "gaussian".into(),
347 mean: 1.0,
348 std: 0.07,
349 },
350 },
351 ]
352}
353
354pub fn diversity_histogram(crowd: &Crowd, axis_idx: usize, bins: usize) -> Vec<u32> {
356 if bins == 0 || crowd.members.is_empty() {
357 return vec![];
358 }
359 let vals: Vec<f32> = crowd
360 .members
361 .iter()
362 .filter_map(|m| m.params.get(axis_idx).map(|(_, v)| *v))
363 .collect();
364 if vals.is_empty() {
365 return vec![0; bins];
366 }
367 let min_v = vals.iter().cloned().fold(f32::INFINITY, f32::min);
368 let max_v = vals.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
369 let range = (max_v - min_v).max(1e-9);
370 let mut hist = vec![0u32; bins];
371 for v in &vals {
372 let idx = (((v - min_v) / range) * bins as f32) as usize;
373 let idx = idx.min(bins - 1);
374 hist[idx] += 1;
375 }
376 hist
377}
378
379#[cfg(test)]
384mod tests {
385 use super::*;
386
387 fn simple_spec(n: usize) -> CrowdSpec {
388 CrowdSpec {
389 n,
390 axes: standard_crowd_axes(),
391 seed: 42,
392 }
393 }
394
395 #[test]
396 fn test_crowd_size_matches_spec() {
397 let spec = simple_spec(50);
398 let crowd = generate_crowd(&spec);
399 assert_eq!(crowd.members.len(), 50);
400 }
401
402 #[test]
403 fn test_crowd_zero_size() {
404 let spec = simple_spec(0);
405 let crowd = generate_crowd(&spec);
406 assert!(crowd.members.is_empty());
407 }
408
409 #[test]
410 fn test_params_count_matches_axes() {
411 let spec = simple_spec(10);
412 let crowd = generate_crowd(&spec);
413 for m in &crowd.members {
414 assert_eq!(m.params.len(), spec.axes.len());
415 }
416 }
417
418 #[test]
419 fn test_params_in_range() {
420 let spec = simple_spec(100);
421 let crowd = generate_crowd(&spec);
422 for m in &crowd.members {
423 for (j, (_, v)) in m.params.iter().enumerate() {
424 let ax = &spec.axes[j];
425 assert!(
426 (ax.min..=ax.max).contains(v),
427 "axis {} out of range: {}",
428 ax.name,
429 v
430 );
431 }
432 }
433 }
434
435 #[test]
436 fn test_determinism() {
437 let spec = simple_spec(20);
438 let c1 = generate_crowd(&spec);
439 let c2 = generate_crowd(&spec);
440 for (a, b) in c1.members.iter().zip(c2.members.iter()) {
441 for (pa, pb) in a.params.iter().zip(b.params.iter()) {
442 assert!((pa.1 - pb.1).abs() < 1e-9);
443 }
444 }
445 }
446
447 #[test]
448 fn test_different_seeds_differ() {
449 let spec1 = CrowdSpec {
450 n: 20,
451 axes: standard_crowd_axes(),
452 seed: 1,
453 };
454 let spec2 = CrowdSpec {
455 n: 20,
456 axes: standard_crowd_axes(),
457 seed: 2,
458 };
459 let c1 = generate_crowd(&spec1);
460 let c2 = generate_crowd(&spec2);
461 let differs = c1.members.iter().zip(c2.members.iter()).any(|(a, b)| {
462 a.params
463 .iter()
464 .zip(b.params.iter())
465 .any(|(pa, pb)| (pa.1 - pb.1).abs() > 1e-6)
466 });
467 assert!(differs);
468 }
469
470 #[test]
471 fn test_diversity_positive_for_varied_crowd() {
472 let spec = simple_spec(30);
473 let crowd = generate_crowd(&spec);
474 assert!(crowd_diversity_score(&crowd) > 0.0);
475 }
476
477 #[test]
478 fn test_diversity_zero_for_single_member() {
479 let spec = simple_spec(1);
480 let crowd = generate_crowd(&spec);
481 assert_eq!(crowd_diversity_score(&crowd), 0.0);
482 }
483
484 #[test]
485 fn test_cluster_k_groups() {
486 let spec = simple_spec(40);
487 let crowd = generate_crowd(&spec);
488 let assignments = cluster_crowd(&crowd, 4);
489 assert_eq!(assignments.len(), 40);
490 let max_group = assignments.iter().cloned().max().unwrap_or(0);
491 assert!(max_group < 4);
492 }
493
494 #[test]
495 fn test_cluster_single_group() {
496 let spec = simple_spec(10);
497 let crowd = generate_crowd(&spec);
498 let assignments = cluster_crowd(&crowd, 1);
499 assert!(assignments.iter().all(|&g| g == 0));
500 }
501
502 #[test]
503 fn test_cluster_empty_crowd() {
504 let spec = simple_spec(0);
505 let crowd = generate_crowd(&spec);
506 let assignments = cluster_crowd(&crowd, 3);
507 assert!(assignments.is_empty());
508 }
509
510 #[test]
511 fn test_crowd_to_json_non_empty() {
512 let spec = simple_spec(5);
513 let crowd = generate_crowd(&spec);
514 let json = crowd_to_json(&crowd);
515 assert!(json.starts_with('['));
516 assert!(json.ends_with(']'));
517 assert!(json.contains("height"));
518 }
519
520 #[test]
521 fn test_histogram_sum_equals_n() {
522 let spec = simple_spec(50);
523 let crowd = generate_crowd(&spec);
524 let hist = diversity_histogram(&crowd, 0, 10);
525 let total: u32 = hist.iter().sum();
526 assert_eq!(total, 50);
527 }
528
529 #[test]
530 fn test_histogram_bins_count() {
531 let spec = simple_spec(20);
532 let crowd = generate_crowd(&spec);
533 let hist = diversity_histogram(&crowd, 1, 5);
534 assert_eq!(hist.len(), 5);
535 }
536
537 #[test]
538 fn test_standard_axes_count() {
539 let axes = standard_crowd_axes();
540 assert_eq!(axes.len(), 8);
541 }
542
543 #[test]
544 fn test_lcg_sample_range() {
545 let mut state = 12345u64;
546 for _ in 0..1000 {
547 let v = lcg_sample(&mut state, 0.0, 1.0);
548 assert!((0.0..=1.0).contains(&v));
549 }
550 }
551
552 #[test]
553 fn test_gaussian_distribution_kind() {
554 let axes = standard_crowd_axes();
555 let height_ax = axes
557 .iter()
558 .find(|a| a.name == "height")
559 .expect("should succeed");
560 assert_eq!(height_ax.distribution.kind, "gaussian");
561 }
562}