Skip to main content

dsfb_debug/
causality.rs

1//! DSFB-Debug: causality / graph attribution — root-cause stamping
2//! over the service-call graph (no_std).
3//!
4//! # Role in the operator workflow
5//!
6//! Pure-function service-graph walk. Given the per-(window, signal)
7//! evaluation grid (`SignalEvaluation` from `run_evaluation`) and a
8//! service dependency graph encoded as parent→child signal-index
9//! pairs, for each closed episode this module returns the most-
10//! upstream contributing signal as the candidate root cause. The
11//! result is written into `DebugEpisode.root_cause_signal_index`.
12//!
13//! Per panellist P11 (Senior SRE): "questions 2 and 3 of the eight
14//! load-bearing on-call questions are 'which service is the
15//! originator?' and 'what changed?'. Without graph attribution
16//! DSFB-Debug answers neither." This module is the answer.
17//!
18//! # Deterministic algorithm (Theorem 9 preserved)
19//!
20//! 1. For each episode, scan the per-(window, signal) grid over the
21//!    window range `[start_window, end_window]`.
22//! 2. Find the lexicographically-earliest `(window, signal)` pair
23//!    whose `confirmed_grammar_state >= Boundary` AND whose absolute
24//!    `sign_tuple.slew` exceeds the engine's `slew_delta` threshold.
25//!    This is the "first slew window".
26//! 3. Among the signals contributing in the first slew window, find
27//!    those whose graph-incoming edges (parent → this signal) come
28//!    from outside the contributing-signal set of the episode —
29//!    these are the upstream-most signals.
30//! 4. Return the lowest such signal index. Tie-broken by lowest
31//!    index for determinism.
32//!
33//! # Failure modes (returns `None`, never silently fabricates)
34//!
35//! - Empty graph (no edges supplied) → no attribution
36//! - Episode has fewer than 2 contributing signals → no attribution
37//!   (single-signal episodes have no upstream/downstream distinction
38//!   within the episode itself)
39//!
40//! `None` is the honest "I cannot attribute" answer; the engine
41//! never invents a root cause.
42
43#![allow(clippy::too_many_arguments)]
44
45use crate::types::*;
46
47/// Walk the service-call graph and stamp each closed episode with its
48/// most-upstream contributing signal index, if determinable.
49///
50/// `eval_out` must be the same buffer that `run_evaluation` populated.
51/// `service_graph` is a slice of `(parent_signal_idx, child_signal_idx)`
52/// pairs. Empty graph → no attribution.
53pub fn attribute_root_causes(
54    episodes_out: &mut [DebugEpisode],
55    episode_count: usize,
56    eval_out: &[SignalEvaluation],
57    num_signals: usize,
58    num_windows: usize,
59    service_graph: &[(u16, u16)],
60    slew_delta: f64,
61) {
62    if service_graph.is_empty() {
63        let mut i = 0;
64        while i < episode_count {
65            episodes_out[i].root_cause_signal_index = None;
66            i += 1;
67        }
68        return;
69    }
70
71    let mut ep_idx: usize = 0;
72    while ep_idx < episode_count {
73        episodes_out[ep_idx].root_cause_signal_index =
74            attribute_one_episode(
75                &episodes_out[ep_idx],
76                eval_out,
77                num_signals,
78                num_windows,
79                service_graph,
80                slew_delta,
81            );
82        ep_idx += 1;
83    }
84}
85
86/// Attribute a single episode. Returns `Some(signal_idx)` when a
87/// unique upstream-most contributor is determinable, `None` otherwise.
88fn attribute_one_episode(
89    episode: &DebugEpisode,
90    eval_out: &[SignalEvaluation],
91    num_signals: usize,
92    num_windows: usize,
93    service_graph: &[(u16, u16)],
94    slew_delta: f64,
95) -> Option<u16> {
96    let start_w = episode.start_window as usize;
97    let end_w = episode.end_window as usize;
98    if start_w >= num_windows {
99        return None;
100    }
101
102    // Collect contributing signals: those whose confirmed grammar state
103    // reached Boundary or higher anywhere in [start_w, end_w].
104    // Bitset over num_signals (max 512 supported here).
105    const MAX_SIG: usize = 512;
106    let mut contrib = [false; MAX_SIG];
107    if num_signals > MAX_SIG {
108        return None;
109    }
110
111    let mut w = start_w;
112    while w <= end_w && w < num_windows {
113        let mut s = 0;
114        while s < num_signals {
115            let idx = w * num_signals + s;
116            if idx < eval_out.len() {
117                let e = eval_out[idx];
118                if e.confirmed_grammar_state >= GrammarState::Boundary {
119                    contrib[s] = true;
120                }
121            }
122            s += 1;
123        }
124        w += 1;
125    }
126
127    // Count contributors. If fewer than 2, attribution is moot.
128    let mut contrib_count = 0;
129    let mut s = 0;
130    while s < num_signals {
131        if contrib[s] {
132            contrib_count += 1;
133        }
134        s += 1;
135    }
136    if contrib_count < 2 {
137        return None;
138    }
139
140    // Find the first-slew (window, signal) pair: earliest window with
141    // any contributor whose absolute slew exceeds slew_delta. Among
142    // those, lowest signal index for determinism.
143    let mut first_slew_signal: Option<u16> = None;
144    let mut w = start_w;
145    while w <= end_w && w < num_windows && first_slew_signal.is_none() {
146        let mut s = 0;
147        while s < num_signals {
148            let idx = w * num_signals + s;
149            if idx < eval_out.len() && contrib[s] {
150                let e = eval_out[idx];
151                let slew_abs = if e.sign_tuple.slew >= 0.0 {
152                    e.sign_tuple.slew
153                } else {
154                    -e.sign_tuple.slew
155                };
156                if slew_abs > slew_delta {
157                    first_slew_signal = Some(s as u16);
158                    break;
159                }
160            }
161            s += 1;
162        }
163        w += 1;
164    }
165    let _first = first_slew_signal?;
166
167    // Find upstream-most contributors: those whose ALL graph-incoming
168    // edges (parent → this signal) come from signals NOT in the
169    // contributing set. These are the boundary-of-the-cascade signals.
170    // Among them, lowest index wins.
171    let mut upstream_most: Option<u16> = None;
172    let mut s = 0_u16;
173    while (s as usize) < num_signals {
174        if contrib[s as usize] {
175            let mut has_internal_parent = false;
176            let mut has_any_parent = false;
177            let mut g = 0;
178            while g < service_graph.len() {
179                let (parent, child) = service_graph[g];
180                if child == s {
181                    has_any_parent = true;
182                    if (parent as usize) < num_signals && contrib[parent as usize] {
183                        has_internal_parent = true;
184                        break;
185                    }
186                }
187                g += 1;
188            }
189            // Upstream-most: has either no parent in graph (root of
190            // service tree) or has parents only OUTSIDE the contributing
191            // set. The presence of a parent outside the contributing
192            // set means the cascade entered through this signal.
193            if !has_internal_parent {
194                let _ = has_any_parent; // intentionally unused — both
195                                         // graph-roots and external-parent
196                                         // boundary signals qualify
197                if upstream_most.is_none() {
198                    upstream_most = Some(s);
199                }
200            }
201        }
202        s += 1;
203    }
204    upstream_most
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210
211    fn ev(window: u64, signal: u16, state: GrammarState, slew: f64) -> SignalEvaluation {
212        SignalEvaluation {
213            window_index: window,
214            signal_index: signal,
215            residual_value: 0.0,
216            sign_tuple: SignTuple { norm: 0.0, drift: 0.0, slew },
217            raw_grammar_state: state,
218            confirmed_grammar_state: state,
219            reason_code: ReasonCode::AbruptSlewViolation,
220            motif: None,
221            semantic_disposition: SemanticDisposition::Unknown,
222            dsa_score: 0.0,
223            policy_state: PolicyState::Review,
224            was_imputed: false,
225            drift_persistence: 0.0,
226        }
227    }
228
229    fn blank_ep(start: u64, end: u64, contrib: u16) -> DebugEpisode {
230        DebugEpisode {
231            episode_id: 0,
232            start_window: start,
233            end_window: end,
234            peak_grammar_state: GrammarState::Boundary,
235            primary_reason_code: ReasonCode::AbruptSlewViolation,
236            matched_motif: SemanticDisposition::Unknown,
237            policy_state: PolicyState::Review,
238            contributing_signal_count: contrib,
239            structural_signature: StructuralSignature {
240                dominant_drift_direction: DriftDirection::Positive,
241                peak_slew_magnitude: 0.5,
242                duration_windows: end - start + 1,
243                signal_correlation: contrib as f64 / 4.0,
244            },
245            root_cause_signal_index: None,
246        }
247    }
248
249    #[test]
250    fn attributes_linear_chain_root_to_signal_zero() {
251        // Service graph: s0 -> s1 -> s2 -> s3 (linear chain).
252        // All four contribute; first slew on s0 then propagates.
253        let graph: &[(u16, u16)] = &[(0, 1), (1, 2), (2, 3)];
254        let num_signals = 4;
255        let num_windows = 6;
256        let n = num_signals * num_windows;
257        let mut eval = [SignalEvaluation {
258            window_index: 0, signal_index: 0, residual_value: 0.0,
259            sign_tuple: SignTuple::ZERO,
260            raw_grammar_state: GrammarState::Admissible,
261            confirmed_grammar_state: GrammarState::Admissible,
262            reason_code: ReasonCode::Admissible,
263            motif: None, semantic_disposition: SemanticDisposition::Unknown,
264            dsa_score: 0.0, policy_state: PolicyState::Silent, was_imputed: false,
265            drift_persistence: 0.0,
266        }; 64];
267        // Slew on s0 first (window 0), then s1 (window 1), s2 (window 2), s3 (window 3).
268        // num_signals = 4 → row offsets are 0, 4, 8, 12.
269        eval[0] = ev(0, 0, GrammarState::Boundary, 0.9);
270        eval[5] = ev(1, 1, GrammarState::Boundary, 0.8);
271        eval[10] = ev(2, 2, GrammarState::Boundary, 0.7);
272        eval[15] = ev(3, 3, GrammarState::Boundary, 0.6);
273        let _ = n;
274
275        let mut episodes = [blank_ep(0, 4, 4); 1];
276        attribute_root_causes(&mut episodes, 1, &eval, num_signals, num_windows, graph, 0.1);
277
278        assert_eq!(episodes[0].root_cause_signal_index, Some(0),
279                   "linear-chain cascade should attribute root to s0");
280    }
281
282    #[test]
283    fn empty_graph_yields_none() {
284        let mut eval = [SignalEvaluation {
285            window_index: 0, signal_index: 0, residual_value: 0.0,
286            sign_tuple: SignTuple::ZERO,
287            raw_grammar_state: GrammarState::Admissible,
288            confirmed_grammar_state: GrammarState::Admissible,
289            reason_code: ReasonCode::Admissible,
290            motif: None, semantic_disposition: SemanticDisposition::Unknown,
291            dsa_score: 0.0, policy_state: PolicyState::Silent, was_imputed: false,
292            drift_persistence: 0.0,
293        }; 16];
294        eval[0] = ev(0, 0, GrammarState::Boundary, 0.5);
295        eval[1] = ev(0, 1, GrammarState::Boundary, 0.5);
296        let mut episodes = [blank_ep(0, 1, 2); 1];
297        attribute_root_causes(&mut episodes, 1, &eval, 4, 4, &[], 0.1);
298        assert_eq!(episodes[0].root_cause_signal_index, None,
299                   "empty graph must not produce attribution");
300    }
301
302    #[test]
303    fn single_signal_episode_yields_none() {
304        // One contributor → attribution is moot.
305        let graph: &[(u16, u16)] = &[(0, 1)];
306        let mut eval = [SignalEvaluation {
307            window_index: 0, signal_index: 0, residual_value: 0.0,
308            sign_tuple: SignTuple::ZERO,
309            raw_grammar_state: GrammarState::Admissible,
310            confirmed_grammar_state: GrammarState::Admissible,
311            reason_code: ReasonCode::Admissible,
312            motif: None, semantic_disposition: SemanticDisposition::Unknown,
313            dsa_score: 0.0, policy_state: PolicyState::Silent, was_imputed: false,
314            drift_persistence: 0.0,
315        }; 16];
316        eval[0] = ev(0, 0, GrammarState::Boundary, 0.5);
317        let mut episodes = [blank_ep(0, 1, 1); 1];
318        attribute_root_causes(&mut episodes, 1, &eval, 4, 4, graph, 0.1);
319        assert_eq!(episodes[0].root_cause_signal_index, None,
320                   "single-signal episode must not produce attribution");
321    }
322}