Skip to main content

oxihuman_morph/
crowd_variation.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! Crowd generation with controlled variation using deterministic LCG.
5
6#[allow(dead_code)]
7/// Distribution shape for a variation axis.
8#[derive(Debug, Clone)]
9pub struct Distribution {
10    /// "uniform", "gaussian", or "bimodal"
11    pub kind: String,
12    pub mean: f32,
13    pub std: f32,
14}
15
16#[allow(dead_code)]
17/// A single variation dimension (e.g. height, weight).
18#[derive(Debug, Clone)]
19pub struct VariationAxis {
20    pub name: String,
21    pub min: f32,
22    pub max: f32,
23    pub distribution: Distribution,
24}
25
26#[allow(dead_code)]
27/// Specification for generating a crowd.
28#[derive(Debug, Clone)]
29pub struct CrowdSpec {
30    pub n: usize,
31    pub axes: Vec<VariationAxis>,
32    pub seed: u64,
33}
34
35#[allow(dead_code)]
36/// One person in a crowd.
37#[derive(Debug, Clone)]
38pub struct CrowdMember {
39    pub id: usize,
40    pub params: Vec<(String, f32)>,
41    pub group_id: usize,
42}
43
44#[allow(dead_code)]
45/// A generated crowd.
46#[derive(Debug, Clone)]
47pub struct Crowd {
48    pub members: Vec<CrowdMember>,
49    pub spec: CrowdSpec,
50}
51
52// ---------------------------------------------------------------------------
53// LCG helpers
54// ---------------------------------------------------------------------------
55
56/// LCG step — multiplier/increment from Numerical Recipes.
57#[inline]
58fn lcg_step(state: &mut u64) {
59    *state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
60}
61
62/// Sample a uniform float in [min, max) using the LCG state.
63pub fn lcg_sample(state: &mut u64, min: f32, max: f32) -> f32 {
64    lcg_step(state);
65    let t = (*state as f32) / (u64::MAX as f32);
66    min + t * (max - min)
67}
68
69/// Box-Muller transform for Gaussian sampling, clamped to [mean-3σ, mean+3σ].
70pub fn lcg_gaussian(state: &mut u64, mean: f32, std: f32) -> f32 {
71    let u1 = lcg_sample(state, 1e-9, 1.0);
72    let u2 = lcg_sample(state, 0.0, 1.0);
73    let z = (-2.0 * u1.ln()).sqrt() * (2.0 * std::f32::consts::PI * u2).cos();
74    mean + std * z
75}
76
77/// Sample from the axis distribution, clamped to [min, max].
78fn sample_axis(state: &mut u64, axis: &VariationAxis) -> f32 {
79    let v = match axis.distribution.kind.as_str() {
80        "gaussian" => lcg_gaussian(state, axis.distribution.mean, axis.distribution.std),
81        "bimodal" => {
82            // two Gaussians at ±σ from mean
83            let which = lcg_sample(state, 0.0, 1.0);
84            let offset = if which < 0.5 {
85                -axis.distribution.std
86            } else {
87                axis.distribution.std
88            };
89            lcg_gaussian(
90                state,
91                axis.distribution.mean + offset,
92                axis.distribution.std * 0.5,
93            )
94        }
95        _ => lcg_sample(state, axis.min, axis.max), // "uniform"
96    };
97    v.clamp(axis.min, axis.max)
98}
99
100// ---------------------------------------------------------------------------
101// Public API
102// ---------------------------------------------------------------------------
103
104/// Generate a crowd from a spec.
105pub fn generate_crowd(spec: &CrowdSpec) -> Crowd {
106    let mut state = spec.seed ^ 0xDEAD_BEEF_CAFE_BABE;
107    let members: Vec<CrowdMember> = (0..spec.n)
108        .map(|id| {
109            let params: Vec<(String, f32)> = spec
110                .axes
111                .iter()
112                .map(|ax| (ax.name.clone(), sample_axis(&mut state, ax)))
113                .collect();
114            CrowdMember {
115                id,
116                params,
117                group_id: 0,
118            }
119        })
120        .collect();
121    Crowd {
122        members,
123        spec: spec.clone(),
124    }
125}
126
127/// Average pairwise Euclidean distance across all param axes.
128pub fn crowd_diversity_score(crowd: &Crowd) -> f32 {
129    let n = crowd.members.len();
130    if n < 2 {
131        return 0.0;
132    }
133    let mut total = 0.0f32;
134    let mut count = 0usize;
135    for i in 0..n {
136        for j in (i + 1)..n {
137            let a = &crowd.members[i].params;
138            let b = &crowd.members[j].params;
139            let dist: f32 = a
140                .iter()
141                .zip(b.iter())
142                .map(|(x, y)| (x.1 - y.1).powi(2))
143                .sum::<f32>()
144                .sqrt();
145            total += dist;
146            count += 1;
147        }
148    }
149    if count == 0 {
150        0.0
151    } else {
152        total / count as f32
153    }
154}
155
156/// K-means cluster assignment (Euclidean, max 100 iterations).
157pub fn cluster_crowd(crowd: &Crowd, k: usize) -> Vec<usize> {
158    let n = crowd.members.len();
159    if n == 0 || k == 0 {
160        return vec![];
161    }
162    let k = k.min(n);
163    let dim = crowd.members[0].params.len();
164
165    // Initialise centroids from first k members
166    let mut centroids: Vec<Vec<f32>> = (0..k)
167        .map(|i| crowd.members[i].params.iter().map(|(_, v)| *v).collect())
168        .collect();
169
170    let mut assignments = vec![0usize; n];
171
172    for _ in 0..100 {
173        // Assign
174        let mut changed = false;
175        for (i, m) in crowd.members.iter().enumerate() {
176            let vals: Vec<f32> = m.params.iter().map(|(_, v)| *v).collect();
177            let best = (0..k)
178                .min_by(|&a, &b| {
179                    let da: f32 = vals
180                        .iter()
181                        .zip(&centroids[a])
182                        .map(|(x, c)| (x - c).powi(2))
183                        .sum();
184                    let db: f32 = vals
185                        .iter()
186                        .zip(&centroids[b])
187                        .map(|(x, c)| (x - c).powi(2))
188                        .sum();
189                    da.partial_cmp(&db).unwrap_or(std::cmp::Ordering::Equal)
190                })
191                .unwrap_or(0);
192            if assignments[i] != best {
193                assignments[i] = best;
194                changed = true;
195            }
196        }
197        if !changed {
198            break;
199        }
200        // Update centroids
201        let mut sums = vec![vec![0.0f32; dim]; k];
202        let mut counts = vec![0usize; k];
203        for (i, m) in crowd.members.iter().enumerate() {
204            let c = assignments[i];
205            for (d, (_, v)) in m.params.iter().enumerate() {
206                sums[c][d] += v;
207            }
208            counts[c] += 1;
209        }
210        for c in 0..k {
211            if counts[c] > 0 {
212                for d in 0..dim {
213                    centroids[c][d] = sums[c][d] / counts[c] as f32;
214                }
215            }
216        }
217    }
218    assignments
219}
220
221/// Minimal JSON serialization (no external deps).
222pub fn crowd_to_json(crowd: &Crowd) -> String {
223    let mut out = String::from("[");
224    for (i, m) in crowd.members.iter().enumerate() {
225        if i > 0 {
226            out.push(',');
227        }
228        out.push_str(&format!(
229            r#"{{"id":{},"group_id":{},"params":{{"#,
230            m.id, m.group_id
231        ));
232        for (j, (k, v)) in m.params.iter().enumerate() {
233            if j > 0 {
234                out.push(',');
235            }
236            out.push_str(&format!(r#""{}":{}."#, k, *v as i32));
237            // use serde_json-free approach: just format as fixed decimal
238            out = out.trim_end_matches('.').to_string();
239            out.push_str(&format!("{:.4}", v));
240            // remove the integer part we just pushed — redo cleanly
241        }
242        out.push_str("}}");
243    }
244    out.push(']');
245
246    // Redo with a cleaner builder
247    let mut result = String::from("[");
248    for (i, m) in crowd.members.iter().enumerate() {
249        if i > 0 {
250            result.push(',');
251        }
252        result.push_str(&format!(
253            r#"{{"id":{},"group_id":{},"params":{{"#,
254            m.id, m.group_id
255        ));
256        for (j, (name, val)) in m.params.iter().enumerate() {
257            if j > 0 {
258                result.push(',');
259            }
260            result.push_str(&format!(r#""{}":{:.4}"#, name, val));
261        }
262        result.push_str("}}");
263    }
264    result.push(']');
265    result
266}
267
268/// Eight standard variation axes (height, weight, age, muscle, fat, skin_tone, face_width, leg_length).
269pub fn standard_crowd_axes() -> Vec<VariationAxis> {
270    vec![
271        VariationAxis {
272            name: "height".into(),
273            min: 1.50,
274            max: 2.05,
275            distribution: Distribution {
276                kind: "gaussian".into(),
277                mean: 1.75,
278                std: 0.08,
279            },
280        },
281        VariationAxis {
282            name: "weight".into(),
283            min: 45.0,
284            max: 130.0,
285            distribution: Distribution {
286                kind: "gaussian".into(),
287                mean: 75.0,
288                std: 15.0,
289            },
290        },
291        VariationAxis {
292            name: "age".into(),
293            min: 18.0,
294            max: 80.0,
295            distribution: Distribution {
296                kind: "uniform".into(),
297                mean: 49.0,
298                std: 18.0,
299            },
300        },
301        VariationAxis {
302            name: "muscle".into(),
303            min: 0.0,
304            max: 1.0,
305            distribution: Distribution {
306                kind: "gaussian".into(),
307                mean: 0.4,
308                std: 0.2,
309            },
310        },
311        VariationAxis {
312            name: "fat".into(),
313            min: 0.0,
314            max: 1.0,
315            distribution: Distribution {
316                kind: "gaussian".into(),
317                mean: 0.35,
318                std: 0.2,
319            },
320        },
321        VariationAxis {
322            name: "skin_tone".into(),
323            min: 0.0,
324            max: 1.0,
325            distribution: Distribution {
326                kind: "uniform".into(),
327                mean: 0.5,
328                std: 0.3,
329            },
330        },
331        VariationAxis {
332            name: "face_width".into(),
333            min: 0.8,
334            max: 1.2,
335            distribution: Distribution {
336                kind: "gaussian".into(),
337                mean: 1.0,
338                std: 0.08,
339            },
340        },
341        VariationAxis {
342            name: "leg_length".into(),
343            min: 0.8,
344            max: 1.2,
345            distribution: Distribution {
346                kind: "gaussian".into(),
347                mean: 1.0,
348                std: 0.07,
349            },
350        },
351    ]
352}
353
354/// Histogram of values for a given axis index across all members.
355pub fn diversity_histogram(crowd: &Crowd, axis_idx: usize, bins: usize) -> Vec<u32> {
356    if bins == 0 || crowd.members.is_empty() {
357        return vec![];
358    }
359    let vals: Vec<f32> = crowd
360        .members
361        .iter()
362        .filter_map(|m| m.params.get(axis_idx).map(|(_, v)| *v))
363        .collect();
364    if vals.is_empty() {
365        return vec![0; bins];
366    }
367    let min_v = vals.iter().cloned().fold(f32::INFINITY, f32::min);
368    let max_v = vals.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
369    let range = (max_v - min_v).max(1e-9);
370    let mut hist = vec![0u32; bins];
371    for v in &vals {
372        let idx = (((v - min_v) / range) * bins as f32) as usize;
373        let idx = idx.min(bins - 1);
374        hist[idx] += 1;
375    }
376    hist
377}
378
379// ---------------------------------------------------------------------------
380// Tests
381// ---------------------------------------------------------------------------
382
383#[cfg(test)]
384mod tests {
385    use super::*;
386
387    fn simple_spec(n: usize) -> CrowdSpec {
388        CrowdSpec {
389            n,
390            axes: standard_crowd_axes(),
391            seed: 42,
392        }
393    }
394
395    #[test]
396    fn test_crowd_size_matches_spec() {
397        let spec = simple_spec(50);
398        let crowd = generate_crowd(&spec);
399        assert_eq!(crowd.members.len(), 50);
400    }
401
402    #[test]
403    fn test_crowd_zero_size() {
404        let spec = simple_spec(0);
405        let crowd = generate_crowd(&spec);
406        assert!(crowd.members.is_empty());
407    }
408
409    #[test]
410    fn test_params_count_matches_axes() {
411        let spec = simple_spec(10);
412        let crowd = generate_crowd(&spec);
413        for m in &crowd.members {
414            assert_eq!(m.params.len(), spec.axes.len());
415        }
416    }
417
418    #[test]
419    fn test_params_in_range() {
420        let spec = simple_spec(100);
421        let crowd = generate_crowd(&spec);
422        for m in &crowd.members {
423            for (j, (_, v)) in m.params.iter().enumerate() {
424                let ax = &spec.axes[j];
425                assert!(
426                    (ax.min..=ax.max).contains(v),
427                    "axis {} out of range: {}",
428                    ax.name,
429                    v
430                );
431            }
432        }
433    }
434
435    #[test]
436    fn test_determinism() {
437        let spec = simple_spec(20);
438        let c1 = generate_crowd(&spec);
439        let c2 = generate_crowd(&spec);
440        for (a, b) in c1.members.iter().zip(c2.members.iter()) {
441            for (pa, pb) in a.params.iter().zip(b.params.iter()) {
442                assert!((pa.1 - pb.1).abs() < 1e-9);
443            }
444        }
445    }
446
447    #[test]
448    fn test_different_seeds_differ() {
449        let spec1 = CrowdSpec {
450            n: 20,
451            axes: standard_crowd_axes(),
452            seed: 1,
453        };
454        let spec2 = CrowdSpec {
455            n: 20,
456            axes: standard_crowd_axes(),
457            seed: 2,
458        };
459        let c1 = generate_crowd(&spec1);
460        let c2 = generate_crowd(&spec2);
461        let differs = c1.members.iter().zip(c2.members.iter()).any(|(a, b)| {
462            a.params
463                .iter()
464                .zip(b.params.iter())
465                .any(|(pa, pb)| (pa.1 - pb.1).abs() > 1e-6)
466        });
467        assert!(differs);
468    }
469
470    #[test]
471    fn test_diversity_positive_for_varied_crowd() {
472        let spec = simple_spec(30);
473        let crowd = generate_crowd(&spec);
474        assert!(crowd_diversity_score(&crowd) > 0.0);
475    }
476
477    #[test]
478    fn test_diversity_zero_for_single_member() {
479        let spec = simple_spec(1);
480        let crowd = generate_crowd(&spec);
481        assert_eq!(crowd_diversity_score(&crowd), 0.0);
482    }
483
484    #[test]
485    fn test_cluster_k_groups() {
486        let spec = simple_spec(40);
487        let crowd = generate_crowd(&spec);
488        let assignments = cluster_crowd(&crowd, 4);
489        assert_eq!(assignments.len(), 40);
490        let max_group = assignments.iter().cloned().max().unwrap_or(0);
491        assert!(max_group < 4);
492    }
493
494    #[test]
495    fn test_cluster_single_group() {
496        let spec = simple_spec(10);
497        let crowd = generate_crowd(&spec);
498        let assignments = cluster_crowd(&crowd, 1);
499        assert!(assignments.iter().all(|&g| g == 0));
500    }
501
502    #[test]
503    fn test_cluster_empty_crowd() {
504        let spec = simple_spec(0);
505        let crowd = generate_crowd(&spec);
506        let assignments = cluster_crowd(&crowd, 3);
507        assert!(assignments.is_empty());
508    }
509
510    #[test]
511    fn test_crowd_to_json_non_empty() {
512        let spec = simple_spec(5);
513        let crowd = generate_crowd(&spec);
514        let json = crowd_to_json(&crowd);
515        assert!(json.starts_with('['));
516        assert!(json.ends_with(']'));
517        assert!(json.contains("height"));
518    }
519
520    #[test]
521    fn test_histogram_sum_equals_n() {
522        let spec = simple_spec(50);
523        let crowd = generate_crowd(&spec);
524        let hist = diversity_histogram(&crowd, 0, 10);
525        let total: u32 = hist.iter().sum();
526        assert_eq!(total, 50);
527    }
528
529    #[test]
530    fn test_histogram_bins_count() {
531        let spec = simple_spec(20);
532        let crowd = generate_crowd(&spec);
533        let hist = diversity_histogram(&crowd, 1, 5);
534        assert_eq!(hist.len(), 5);
535    }
536
537    #[test]
538    fn test_standard_axes_count() {
539        let axes = standard_crowd_axes();
540        assert_eq!(axes.len(), 8);
541    }
542
543    #[test]
544    fn test_lcg_sample_range() {
545        let mut state = 12345u64;
546        for _ in 0..1000 {
547            let v = lcg_sample(&mut state, 0.0, 1.0);
548            assert!((0.0..=1.0).contains(&v));
549        }
550    }
551
552    #[test]
553    fn test_gaussian_distribution_kind() {
554        let axes = standard_crowd_axes();
555        // height uses gaussian
556        let height_ax = axes
557            .iter()
558            .find(|a| a.name == "height")
559            .expect("should succeed");
560        assert_eq!(height_ax.distribution.kind, "gaussian");
561    }
562}