converge_analytics/packs/segmentation/
mod.rs1mod solver;
2mod types;
3
4pub use solver::*;
5pub use types::*;
6
7use converge_optimization::packs::{
8 InvariantDef, InvariantResult, Pack, PackSolveResult, default_gate_evaluation,
9};
10use converge_pack::gate::GateResult as Result;
11use converge_pack::gate::{KernelTraceLink, ProblemSpec, PromotionGate, ProposedPlan};
12
13pub struct SegmentationPack;
14
15impl Pack for SegmentationPack {
16 fn name(&self) -> &'static str {
17 "segmentation"
18 }
19
20 fn version(&self) -> &'static str {
21 "1.0.0"
22 }
23
24 fn validate_inputs(&self, inputs: &serde_json::Value) -> Result<()> {
25 let input: SegmentationInput = serde_json::from_value(inputs.clone())
26 .map_err(|e| converge_pack::GateError::invalid_input(format!("Invalid input: {e}")))?;
27 input.validate()
28 }
29
30 fn invariants(&self) -> &[InvariantDef] {
31 static INVARIANTS: std::sync::LazyLock<Vec<InvariantDef>> =
32 std::sync::LazyLock::new(|| {
33 vec![
34 InvariantDef::critical(
35 "non-empty-clusters",
36 "Every cluster must have at least one member",
37 ),
38 InvariantDef::advisory(
39 "balanced-clusters",
40 "Cluster size below 10% of expected proportion",
41 ),
42 ]
43 });
44 &INVARIANTS
45 }
46
47 fn solve(&self, spec: &ProblemSpec) -> Result<PackSolveResult> {
48 let input: SegmentationInput = spec.inputs_as()?;
49 input.validate()?;
50
51 let solver = KMeansSolver;
52 let (output, report) = solver.solve(&input, spec)?;
53
54 let trace = KernelTraceLink::audit_only(format!("trace-{}", spec.problem_id));
55
56 let global_mean: Vec<f64> = {
58 let dim = input.records[0].len();
59 let n = input.records.len() as f64;
60 let mut mean = vec![0.0; dim];
61 for record in &input.records {
62 for (j, &v) in record.iter().enumerate() {
63 mean[j] += v;
64 }
65 }
66 for v in &mut mean {
67 *v /= n;
68 }
69 mean
70 };
71 let total_variance: f64 = input
72 .records
73 .iter()
74 .map(|r| {
75 r.iter()
76 .zip(&global_mean)
77 .map(|(a, b)| (a - b).powi(2))
78 .sum::<f64>()
79 })
80 .sum();
81 let confidence = if total_variance > 0.0 {
82 (1.0 - output.inertia / total_variance).clamp(0.3, 0.95)
83 } else {
84 0.5
85 };
86
87 let plan = ProposedPlan::from_payload(
88 format!("plan-{}", spec.problem_id),
89 self.name(),
90 output.summary(),
91 &output,
92 confidence,
93 trace,
94 )?;
95
96 Ok(PackSolveResult::new(plan, report))
97 }
98
99 fn check_invariants(&self, plan: &ProposedPlan) -> Result<Vec<InvariantResult>> {
100 let output: SegmentationOutput = serde_json::from_value(plan.plan.clone())
101 .map_err(|e| converge_pack::GateError::invalid_input(e.to_string()))?;
102
103 let k = output.centroids.len();
104 let n = output.assignments.len();
105 let mut counts = vec![0usize; k];
106 for &a in &output.assignments {
107 if a < k {
108 counts[a] += 1;
109 }
110 }
111
112 let mut results = vec![];
113
114 let empty_clusters: Vec<usize> = counts
115 .iter()
116 .enumerate()
117 .filter(|(_, c)| **c == 0)
118 .map(|(i, _)| i)
119 .collect();
120
121 if empty_clusters.is_empty() {
122 results.push(InvariantResult::pass("non-empty-clusters"));
123 } else {
124 results.push(InvariantResult::fail(
125 "non-empty-clusters",
126 converge_pack::gate::Violation::new(
127 "non-empty-clusters",
128 empty_clusters.len() as f64,
129 format!("Empty clusters: {:?}", empty_clusters),
130 ),
131 ));
132 }
133
134 let expected_size = n as f64 / k as f64;
135 let threshold = expected_size * 0.1;
136 let undersized: Vec<usize> = counts
137 .iter()
138 .enumerate()
139 .filter(|(_, c)| (**c as f64) < threshold)
140 .map(|(i, _)| i)
141 .collect();
142
143 if undersized.is_empty() {
144 results.push(InvariantResult::pass("balanced-clusters"));
145 } else {
146 results.push(InvariantResult::fail(
147 "balanced-clusters",
148 converge_pack::gate::Violation::new(
149 "balanced-clusters",
150 undersized.len() as f64,
151 format!("Undersized clusters: {:?}", undersized),
152 ),
153 ));
154 }
155
156 Ok(results)
157 }
158
159 fn evaluate_gate(
160 &self,
161 _plan: &ProposedPlan,
162 invariant_results: &[InvariantResult],
163 ) -> PromotionGate {
164 default_gate_evaluation(invariant_results, self.invariants())
165 }
166}