Skip to main content

converge_analytics/packs/segmentation/
mod.rs

1mod solver;
2mod types;
3
4pub use solver::*;
5pub use types::*;
6
7use converge_optimization::packs::{
8    InvariantDef, InvariantResult, Pack, PackSolveResult, default_gate_evaluation,
9};
10use converge_pack::gate::GateResult as Result;
11use converge_pack::gate::{KernelTraceLink, ProblemSpec, PromotionGate, ProposedPlan};
12
13pub struct SegmentationPack;
14
15impl Pack for SegmentationPack {
16    fn name(&self) -> &'static str {
17        "segmentation"
18    }
19
20    fn version(&self) -> &'static str {
21        "1.0.0"
22    }
23
24    fn validate_inputs(&self, inputs: &serde_json::Value) -> Result<()> {
25        let input: SegmentationInput = serde_json::from_value(inputs.clone())
26            .map_err(|e| converge_pack::GateError::invalid_input(format!("Invalid input: {e}")))?;
27        input.validate()
28    }
29
30    fn invariants(&self) -> &[InvariantDef] {
31        static INVARIANTS: std::sync::LazyLock<Vec<InvariantDef>> =
32            std::sync::LazyLock::new(|| {
33                vec![
34                    InvariantDef::critical(
35                        "non-empty-clusters",
36                        "Every cluster must have at least one member",
37                    ),
38                    InvariantDef::advisory(
39                        "balanced-clusters",
40                        "Cluster size below 10% of expected proportion",
41                    ),
42                ]
43            });
44        &INVARIANTS
45    }
46
47    fn solve(&self, spec: &ProblemSpec) -> Result<PackSolveResult> {
48        let input: SegmentationInput = spec.inputs_as()?;
49        input.validate()?;
50
51        let solver = KMeansSolver;
52        let (output, report) = solver.solve(&input, spec)?;
53
54        let trace = KernelTraceLink::audit_only(format!("trace-{}", spec.problem_id));
55
56        // Confidence from cluster quality: lower inertia relative to spread = higher confidence
57        let global_mean: Vec<f64> = {
58            let dim = input.records[0].len();
59            let n = input.records.len() as f64;
60            let mut mean = vec![0.0; dim];
61            for record in &input.records {
62                for (j, &v) in record.iter().enumerate() {
63                    mean[j] += v;
64                }
65            }
66            for v in &mut mean {
67                *v /= n;
68            }
69            mean
70        };
71        let total_variance: f64 = input
72            .records
73            .iter()
74            .map(|r| {
75                r.iter()
76                    .zip(&global_mean)
77                    .map(|(a, b)| (a - b).powi(2))
78                    .sum::<f64>()
79            })
80            .sum();
81        let confidence = if total_variance > 0.0 {
82            (1.0 - output.inertia / total_variance).clamp(0.3, 0.95)
83        } else {
84            0.5
85        };
86
87        let plan = ProposedPlan::from_payload(
88            format!("plan-{}", spec.problem_id),
89            self.name(),
90            output.summary(),
91            &output,
92            confidence,
93            trace,
94        )?;
95
96        Ok(PackSolveResult::new(plan, report))
97    }
98
99    fn check_invariants(&self, plan: &ProposedPlan) -> Result<Vec<InvariantResult>> {
100        let output: SegmentationOutput = serde_json::from_value(plan.plan.clone())
101            .map_err(|e| converge_pack::GateError::invalid_input(e.to_string()))?;
102
103        let k = output.centroids.len();
104        let n = output.assignments.len();
105        let mut counts = vec![0usize; k];
106        for &a in &output.assignments {
107            if a < k {
108                counts[a] += 1;
109            }
110        }
111
112        let mut results = vec![];
113
114        let empty_clusters: Vec<usize> = counts
115            .iter()
116            .enumerate()
117            .filter(|(_, c)| **c == 0)
118            .map(|(i, _)| i)
119            .collect();
120
121        if empty_clusters.is_empty() {
122            results.push(InvariantResult::pass("non-empty-clusters"));
123        } else {
124            results.push(InvariantResult::fail(
125                "non-empty-clusters",
126                converge_pack::gate::Violation::new(
127                    "non-empty-clusters",
128                    empty_clusters.len() as f64,
129                    format!("Empty clusters: {:?}", empty_clusters),
130                ),
131            ));
132        }
133
134        let expected_size = n as f64 / k as f64;
135        let threshold = expected_size * 0.1;
136        let undersized: Vec<usize> = counts
137            .iter()
138            .enumerate()
139            .filter(|(_, c)| (**c as f64) < threshold)
140            .map(|(i, _)| i)
141            .collect();
142
143        if undersized.is_empty() {
144            results.push(InvariantResult::pass("balanced-clusters"));
145        } else {
146            results.push(InvariantResult::fail(
147                "balanced-clusters",
148                converge_pack::gate::Violation::new(
149                    "balanced-clusters",
150                    undersized.len() as f64,
151                    format!("Undersized clusters: {:?}", undersized),
152                ),
153            ));
154        }
155
156        Ok(results)
157    }
158
159    fn evaluate_gate(
160        &self,
161        _plan: &ProposedPlan,
162        invariant_results: &[InvariantResult],
163    ) -> PromotionGate {
164        default_gate_evaluation(invariant_results, self.invariants())
165    }
166}