parlov_analysis/aggregation/
reducer.rs1use std::cmp::Ordering;
9
10use indexmap::IndexMap;
11
12use crate::existence::families::SignalFamily;
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
16pub enum EvidencePolarity {
17 Positive,
19 Contradictory,
21}
22
23#[derive(Debug, Clone)]
28pub struct EvidenceEvent {
29 pub(crate) family: SignalFamily,
30 pub(crate) polarity: EvidencePolarity,
31 pub(crate) technique_id: String,
32 pub(crate) weight: f64,
33 pub(crate) signed_log_odds: f64,
34}
35
36impl EvidenceEvent {
37 #[must_use]
39 pub fn positive(
40 family: SignalFamily,
41 technique_id: impl Into<String>,
42 weight: f64,
43 log_odds_magnitude: f64,
44 ) -> Self {
45 Self {
46 family,
47 polarity: EvidencePolarity::Positive,
48 technique_id: technique_id.into(),
49 weight,
50 signed_log_odds: log_odds_magnitude.abs(),
51 }
52 }
53
54 #[must_use]
56 pub fn contradictory(
57 family: SignalFamily,
58 technique_id: impl Into<String>,
59 weight: f64,
60 log_odds_magnitude: f64,
61 ) -> Self {
62 Self {
63 family,
64 polarity: EvidencePolarity::Contradictory,
65 technique_id: technique_id.into(),
66 weight,
67 signed_log_odds: -log_odds_magnitude.abs(),
68 }
69 }
70}
71
72pub const POSITIVE_SCHEDULE: &[f64] = &[1.0, 0.5, 0.25, 0.1];
74
75pub const CONTRADICTORY_SCHEDULE: &[f64] = &[1.0, 0.7, 0.5, 0.3, 0.1];
77
78pub const PER_GROUP_CAP: f64 = 0.75;
80
81#[must_use]
87pub fn reduce_family_polarity(events: &[EvidenceEvent], schedule: &[f64], cap: f64) -> f64 {
88 if events.is_empty() || schedule.is_empty() {
89 return 0.0;
90 }
91
92 let mut sorted: Vec<&EvidenceEvent> = events.iter().collect();
93 sorted.sort_by(|a, b| cmp_event_desc(a, b));
94
95 let total: f64 = sorted
96 .iter()
97 .zip(schedule.iter())
98 .map(|(event, multiplier)| event.signed_log_odds * multiplier)
99 .sum();
100
101 clamp_magnitude(total, cap)
102}
103
104#[must_use]
109pub fn reduce_all(events: &[EvidenceEvent]) -> f64 {
110 if events.is_empty() {
111 return 0.0;
112 }
113 let mut total = 0.0;
114 for (group, polarity) in group_indices_by_family_polarity(events) {
115 let schedule = schedule_for(polarity);
116 let unclamped: f64 = sorted_indices(events, &group)
117 .into_iter()
118 .zip(schedule.iter())
119 .map(|(i, m)| events[i].signed_log_odds * m)
120 .sum();
121 total += clamp_magnitude(unclamped, PER_GROUP_CAP);
122 }
123 total
124}
125
126fn sorted_indices(events: &[EvidenceEvent], indices: &[usize]) -> Vec<usize> {
129 let mut sorted = indices.to_vec();
130 sorted.sort_by(|&a, &b| cmp_event_desc(&events[a], &events[b]));
131 sorted
132}
133
134#[derive(Debug, Clone)]
136pub struct ReductionResult {
137 pub total_log_odds: f64,
139 pub contributions: Vec<f64>,
142}
143
144#[must_use]
149pub fn reduce_with_attribution(events: &[EvidenceEvent]) -> ReductionResult {
150 if events.is_empty() {
151 return ReductionResult {
152 total_log_odds: 0.0,
153 contributions: Vec::new(),
154 };
155 }
156
157 let mut contributions = vec![0.0_f64; events.len()];
158 let mut total = 0.0_f64;
159 for (group, polarity) in group_indices_by_family_polarity(events) {
160 let schedule = schedule_for(polarity);
161 let group_total = attribute_group(events, &group, schedule, &mut contributions);
162 total += group_total;
163 }
164 ReductionResult {
165 total_log_odds: total,
166 contributions,
167 }
168}
169
170fn attribute_group(
173 events: &[EvidenceEvent],
174 group_indices: &[usize],
175 schedule: &[f64],
176 contributions: &mut [f64],
177) -> f64 {
178 if group_indices.is_empty() || schedule.is_empty() {
179 return 0.0;
180 }
181
182 let sorted = sorted_indices(events, group_indices);
183 let unclamped: f64 = sorted
184 .iter()
185 .zip(schedule.iter())
186 .map(|(&i, m)| events[i].signed_log_odds * m)
187 .sum();
188 let clamped = clamp_magnitude(unclamped, PER_GROUP_CAP);
189 let scale = if (clamped - unclamped).abs() <= f64::EPSILON || unclamped.abs() <= f64::EPSILON {
190 1.0
191 } else {
192 clamped / unclamped
193 };
194
195 for (slot, &idx) in sorted.iter().enumerate() {
196 let multiplier = schedule.get(slot).copied().unwrap_or(0.0);
197 contributions[idx] = events[idx].signed_log_odds * multiplier * scale;
198 }
199 clamped
200}
201
202fn group_indices_by_family_polarity(
204 events: &[EvidenceEvent],
205) -> Vec<(Vec<usize>, EvidencePolarity)> {
206 let mut groups: IndexMap<(SignalFamily, EvidencePolarity), Vec<usize>> = IndexMap::new();
207 for (i, event) in events.iter().enumerate() {
208 groups
209 .entry((event.family, event.polarity))
210 .or_default()
211 .push(i);
212 }
213 groups
214 .into_iter()
215 .map(|((_, polarity), indices)| (indices, polarity))
216 .collect()
217}
218
219fn schedule_for(polarity: EvidencePolarity) -> &'static [f64] {
221 match polarity {
222 EvidencePolarity::Positive => POSITIVE_SCHEDULE,
223 EvidencePolarity::Contradictory => CONTRADICTORY_SCHEDULE,
224 }
225}
226
227fn cmp_event_desc(a: &EvidenceEvent, b: &EvidenceEvent) -> Ordering {
229 match b.weight.partial_cmp(&a.weight) {
230 Some(Ordering::Equal) | None => a.technique_id.cmp(&b.technique_id),
231 Some(other) => other,
232 }
233}
234
235fn clamp_magnitude(value: f64, cap: f64) -> f64 {
237 let cap = cap.abs();
238 if value > cap {
239 cap
240 } else if value < -cap {
241 -cap
242 } else {
243 value
244 }
245}
246
247#[cfg(test)]
248#[path = "reducer_tests.rs"]
249mod tests;