Skip to main content

dsfb_gray/
heuristics.rs

1//! Heuristics bank: typed degradation motifs with Rust-specific provenance.
2//!
3//! The heuristics bank is the semantic layer of the DSFB engine. It maps
4//! residual sign patterns (drift direction, slew shape, source channel
5//! correlation) to named reason codes with human-readable provenance.
6//!
7//! ## Design Decision: Finite and Versioned
8//!
9//! The heuristics bank is explicitly finite. Novel patterns not represented
10//! in the bank produce `UnclassifiedStructuralAnomaly` — the system admits
11//! what it does not know rather than fabricating an explanation.
12//!
13//! ## Failure Mode FM-07: Heuristics Bank Incompleteness
14//!
15//! Novel failure modes not in the bank produce UnclassifiedStructuralAnomaly.
16//! This is by design. The bank is versioned and extensible.
17
18use crate::grammar::GrammarState;
19use crate::residual::{ResidualSign, ResidualSource};
20use crate::ReasonCode;
21
22const MAX_STATIC_PRIORS: usize = 16;
23
24/// Unique identifier for a heuristic entry.
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
26pub struct HeuristicId(pub &'static str);
27
28/// A single entry in the heuristics bank.
29///
30/// Each entry encodes a specific structural pattern recognized by experienced
31/// Rust/distributed-systems engineers, formalized into a deterministic matching
32/// rule.
33#[derive(Debug, Clone, Copy)]
34pub struct HeuristicEntry {
35    /// Unique identifier (e.g., "H-RAFT-01").
36    pub id: HeuristicId,
37    /// Which residual source(s) this heuristic applies to.
38    pub primary_source: ResidualSource,
39    /// Minimum drift magnitude to trigger (absolute value).
40    pub drift_threshold: f64,
41    /// Minimum slew magnitude to trigger (absolute value). Use 0.0 for drift-only.
42    pub slew_threshold: f64,
43    /// Whether drift must be positive (true) or magnitude-only (false).
44    pub drift_positive_required: bool,
45    /// Reason code emitted when this heuristic matches.
46    pub reason_code: ReasonCode,
47    /// Human-readable description of the pattern.
48    pub description: &'static str,
49    /// Rust-specific provenance: what real-world Rust pattern produces this.
50    pub provenance: &'static str,
51}
52
53/// Result of matching a residual sign against the heuristics bank.
54#[derive(Debug, Clone)]
55pub struct MatchResult {
56    /// The reason code from the best-matching heuristic, or
57    /// `UnclassifiedStructuralAnomaly` if no match.
58    pub reason_code: ReasonCode,
59    /// The heuristic ID that matched, if any.
60    pub matched_heuristic: Option<HeuristicId>,
61    /// Match confidence: how strongly the residual sign matches the pattern.
62    /// 0.0 = no match, 1.0 = exact match. Based on how far past thresholds.
63    pub confidence: f64,
64    /// Human-readable description of the matched structural pattern.
65    pub description: &'static str,
66    /// Rust-specific provenance of the matched structural pattern.
67    pub provenance: &'static str,
68    /// Static prior applied to the winning heuristic, if any.
69    pub applied_prior: Option<AppliedStaticPrior>,
70}
71
72/// Static prior for one heuristic, typically sourced from the crate scanner.
73///
74/// The scales are bounded threshold multipliers. Values lower than `1.0`
75/// make the heuristic easier to trigger, while values above `1.0` make it
76/// harder to trigger. The observer clamps them to a safe range.
77#[derive(Debug, Clone, Copy, PartialEq)]
78pub struct StaticPrior {
79    /// Heuristic to which the prior applies.
80    pub heuristic_id: HeuristicId,
81    /// Confidence assigned by the static scanner or caller, in `[0.0, 1.0]`.
82    pub confidence: f64,
83    /// Drift-threshold scale factor.
84    pub drift_scale: f64,
85    /// Slew-threshold scale factor.
86    pub slew_scale: f64,
87}
88
89impl StaticPrior {
90    /// Create a new static prior with bounded confidence and scales.
91    pub fn new(
92        heuristic_id: HeuristicId,
93        confidence: f64,
94        drift_scale: f64,
95        slew_scale: f64,
96    ) -> Self {
97        Self {
98            heuristic_id,
99            confidence: confidence.clamp(0.0, 1.0),
100            drift_scale: drift_scale.clamp(0.5, 2.0),
101            slew_scale: slew_scale.clamp(0.5, 2.0),
102        }
103    }
104}
105
106/// Static prior actually applied during one heuristic match.
107#[derive(Debug, Clone, Copy, PartialEq)]
108pub struct AppliedStaticPrior {
109    /// Heuristic to which the prior was applied.
110    pub heuristic_id: HeuristicId,
111    /// Prior confidence used for this match.
112    pub confidence: f64,
113    /// Effective drift-threshold scale used for this match.
114    pub drift_scale: f64,
115    /// Effective slew-threshold scale used for this match.
116    pub slew_scale: f64,
117}
118
119/// Fixed-capacity collection of static heuristic priors.
120///
121/// This type is `no_std` and `no_alloc` friendly so it can be carried into the
122/// core observer without introducing heap allocation.
123#[derive(Debug, Clone, Copy)]
124pub struct StaticPriorSet {
125    priors: [Option<StaticPrior>; MAX_STATIC_PRIORS],
126    len: usize,
127}
128
129impl StaticPriorSet {
130    /// Create an empty prior set.
131    pub const fn new() -> Self {
132        Self {
133            priors: [None; MAX_STATIC_PRIORS],
134            len: 0,
135        }
136    }
137
138    /// Add or replace one prior. If the set is full, the last slot is reused.
139    pub fn with_prior(mut self, prior: StaticPrior) -> Self {
140        if let Some(existing) = self
141            .priors
142            .iter_mut()
143            .flatten()
144            .find(|existing| existing.heuristic_id == prior.heuristic_id)
145        {
146            *existing = prior;
147            return self;
148        }
149
150        if self.len < MAX_STATIC_PRIORS {
151            self.priors[self.len] = Some(prior);
152            self.len += 1;
153        } else if let Some(slot) = self.priors.last_mut() {
154            *slot = Some(prior);
155        }
156        self
157    }
158
159    /// Return the prior for one heuristic, if present.
160    pub fn get(&self, heuristic_id: HeuristicId) -> Option<StaticPrior> {
161        self.priors
162            .iter()
163            .flatten()
164            .find(|prior| prior.heuristic_id == heuristic_id)
165            .copied()
166    }
167
168    /// Number of configured priors.
169    pub fn len(&self) -> usize {
170        self.len
171    }
172
173    /// Whether the set is empty.
174    pub fn is_empty(&self) -> bool {
175        self.len == 0
176    }
177}
178
179impl Default for StaticPriorSet {
180    fn default() -> Self {
181        Self::new()
182    }
183}
184
185/// The heuristics bank: a fixed-size collection of typed degradation motifs.
186///
187/// Version 1.0: 12 entries covering the primary distributed Rust system
188/// failure patterns. The bank is finite and versioned — novel patterns
189/// produce `UnclassifiedStructuralAnomaly`.
190pub struct HeuristicsBank {
191    entries: &'static [HeuristicEntry],
192}
193
194/// Default heuristics bank with Rust-specific entries.
195pub const DEFAULT_ENTRIES: &[HeuristicEntry] = &[
196    HeuristicEntry {
197        id: HeuristicId("H-ALLOC-01"),
198        primary_source: ResidualSource::MemoryUsage,
199        drift_threshold: 0.05,
200        slew_threshold: 0.02,
201        drift_positive_required: true,
202        reason_code: ReasonCode::MemoryPressureEscalation,
203        description:
204            "Monotonic increase in allocation latency with step-change at capacity doubling",
205        provenance: "Vec<T> capacity doubling in hot loop; jemalloc arena exhaustion",
206    },
207    HeuristicEntry {
208        id: HeuristicId("H-LOCK-01"),
209        primary_source: ResidualSource::Latency,
210        drift_threshold: 0.03,
211        slew_threshold: 0.01,
212        drift_positive_required: true,
213        reason_code: ReasonCode::SustainedLatencyDrift,
214        description: "Gradual increase in write-hold duration with burst at reader-count threshold",
215        provenance: "tokio::sync::RwLock under read-heavy → write-heavy transition",
216    },
217    HeuristicEntry {
218        id: HeuristicId("H-RAFT-01"),
219        primary_source: ResidualSource::HeartbeatRtt,
220        drift_threshold: 0.04,
221        slew_threshold: 0.0,
222        drift_positive_required: true,
223        reason_code: ReasonCode::ConsensusHeartbeatDegradation,
224        description: "Increasing RTT to one follower drifting toward election timeout",
225        provenance: "openraft follower with injected clock drift approaching election_timeout_ms",
226    },
227    HeuristicEntry {
228        id: HeuristicId("H-ASYNC-01"),
229        primary_source: ResidualSource::PollDuration,
230        drift_threshold: 0.02,
231        slew_threshold: 0.0,
232        drift_positive_required: true,
233        reason_code: ReasonCode::AsyncRuntimeStarvation,
234        description: "Gradual increase in poll time indicating blocking in async context",
235        provenance: "Blocking operation in async context; tokio runtime starvation",
236    },
237    HeuristicEntry {
238        id: HeuristicId("H-TCP-01"),
239        primary_source: ResidualSource::Latency,
240        drift_threshold: 0.06,
241        slew_threshold: 0.03,
242        drift_positive_required: true,
243        reason_code: ReasonCode::PartialPartitionSignature,
244        description: "Burst of retransmits followed by drift in RTT variance",
245        provenance: "Partial network partition; selective packet loss on specific routes",
246    },
247    HeuristicEntry {
248        id: HeuristicId("H-CHAN-01"),
249        primary_source: ResidualSource::QueueDepth,
250        drift_threshold: 0.05,
251        slew_threshold: 0.02,
252        drift_positive_required: true,
253        reason_code: ReasonCode::ChannelBackpressureOnset,
254        description: "Growing queue depth with drift-then-slew at backpressure onset",
255        provenance: "tokio::sync::mpsc bounded channel approaching capacity",
256    },
257    HeuristicEntry {
258        id: HeuristicId("H-CLOCK-01"),
259        primary_source: ResidualSource::HeartbeatRtt,
260        drift_threshold: 0.02,
261        slew_threshold: 0.0,
262        drift_positive_required: false,
263        reason_code: ReasonCode::ClockDriftDivergence,
264        description: "Monotonic drift in timestamp-derived residuals between nodes",
265        provenance: "TSC vs HPET clock source discrepancy between cluster nodes",
266    },
267    HeuristicEntry {
268        id: HeuristicId("H-THRU-01"),
269        primary_source: ResidualSource::Throughput,
270        drift_threshold: 0.03,
271        slew_threshold: 0.0,
272        drift_positive_required: false,
273        reason_code: ReasonCode::ThroughputDegradation,
274        description: "Persistent throughput decline not attributable to workload reduction",
275        provenance: "Resource contention from co-located process; IO scheduler starvation",
276    },
277    HeuristicEntry {
278        id: HeuristicId("H-SERDE-01"),
279        primary_source: ResidualSource::SerdeLatency,
280        drift_threshold: 0.04,
281        slew_threshold: 0.02,
282        drift_positive_required: true,
283        reason_code: ReasonCode::SerializationDrift,
284        description: "Serialization latency increasing with step-change at schema boundary",
285        provenance: "serde deserialization with growing payload; schema migration overhead",
286    },
287    HeuristicEntry {
288        id: HeuristicId("H-GRPC-01"),
289        primary_source: ResidualSource::FlowControlWindow,
290        drift_threshold: 0.05,
291        slew_threshold: 0.03,
292        drift_positive_required: true,
293        reason_code: ReasonCode::FlowControlExhaustion,
294        description: "Flow control window approaching exhaustion with drift-then-violation",
295        provenance: "tonic stream backpressure; h2 flow control window starvation",
296    },
297    HeuristicEntry {
298        id: HeuristicId("H-DNS-01"),
299        primary_source: ResidualSource::DnsLatency,
300        drift_threshold: 0.03,
301        slew_threshold: 0.01,
302        drift_positive_required: true,
303        reason_code: ReasonCode::SustainedLatencyDrift,
304        description: "DNS resolution time increasing with step-change at cache expiry",
305        provenance: "trust-dns resolver cache poisoning or upstream resolver degradation",
306    },
307    HeuristicEntry {
308        id: HeuristicId("H-ERR-01"),
309        primary_source: ResidualSource::ErrorRate,
310        drift_threshold: 0.02,
311        slew_threshold: 0.01,
312        drift_positive_required: true,
313        reason_code: ReasonCode::SustainedLatencyDrift,
314        description: "Error rate growing monotonically with acceleration at saturation",
315        provenance: "Connection pool exhaustion; timeout cascade in microservice chain",
316    },
317];
318
319impl HeuristicsBank {
320    /// Create a bank with the default Rust distributed-systems entries.
321    pub fn default_bank() -> Self {
322        Self {
323            entries: DEFAULT_ENTRIES,
324        }
325    }
326
327    /// Create a bank with custom entries.
328    pub fn custom(entries: &'static [HeuristicEntry]) -> Self {
329        Self { entries }
330    }
331
332    /// Match a residual sign against the bank.
333    ///
334    /// Returns the best-matching heuristic (highest confidence) or
335    /// `UnclassifiedStructuralAnomaly` if no entry matches.
336    ///
337    /// Only matches when the grammar state is `Boundary` or `Violation`.
338    /// In `Admissible` state, returns `NoAnomaly`.
339    pub fn match_sign(&self, sign: &ResidualSign, grammar_state: GrammarState) -> MatchResult {
340        self.match_sign_with_priors(sign, grammar_state, &StaticPriorSet::default())
341    }
342
343    /// Match a residual sign against the bank using optional static priors.
344    ///
345    /// Static priors do not force a detection. They only apply bounded
346    /// threshold scaling to the candidate heuristic they target.
347    pub fn match_sign_with_priors(
348        &self,
349        sign: &ResidualSign,
350        grammar_state: GrammarState,
351        priors: &StaticPriorSet,
352    ) -> MatchResult {
353        if grammar_state == GrammarState::Admissible {
354            return no_anomaly_match();
355        }
356
357        let mut best_match: Option<(&HeuristicEntry, f64, Option<AppliedStaticPrior>)> = None;
358
359        for entry in self.entries.iter() {
360            if let Some(candidate) = evaluate_heuristic_entry(entry, sign, priors) {
361                match best_match {
362                    None => best_match = Some(candidate),
363                    Some((_, best_conf, _)) if candidate.1 > best_conf => {
364                        best_match = Some(candidate)
365                    }
366                    Some((_, best_conf, _)) if candidate.1 <= best_conf => {}
367                    Some((_, _, _)) => {}
368                }
369            }
370        }
371
372        match best_match {
373            Some((entry, confidence, applied_prior)) => {
374                matched_heuristic_result(entry, confidence, applied_prior)
375            }
376            None => unmatched_anomaly_result(),
377        }
378    }
379
380    /// Number of entries in the bank.
381    pub fn len(&self) -> usize {
382        self.entries.len()
383    }
384
385    /// Whether the bank is empty.
386    pub fn is_empty(&self) -> bool {
387        self.entries.is_empty()
388    }
389
390    /// Bank version identifier.
391    pub fn version(&self) -> &'static str {
392        "1.0.0"
393    }
394}
395
396fn no_anomaly_match() -> MatchResult {
397    MatchResult {
398        reason_code: ReasonCode::NoAnomaly,
399        matched_heuristic: None,
400        confidence: 0.0,
401        description: "No structural anomaly detected",
402        provenance: "Grammar state remained Admissible",
403        applied_prior: None,
404    }
405}
406
407fn evaluate_heuristic_entry(
408    entry: &'static HeuristicEntry,
409    sign: &ResidualSign,
410    priors: &StaticPriorSet,
411) -> Option<(&'static HeuristicEntry, f64, Option<AppliedStaticPrior>)> {
412    if entry.primary_source != sign.source {
413        return None;
414    }
415
416    let applied_prior = applied_prior_for_entry(entry, priors);
417    let (effective_drift_threshold, effective_slew_threshold) =
418        effective_thresholds(entry, applied_prior.as_ref());
419    if !drift_threshold_matches(entry, sign, effective_drift_threshold) {
420        return None;
421    }
422
423    Some((
424        entry,
425        confidence_for_match(sign, effective_drift_threshold, effective_slew_threshold),
426        applied_prior,
427    ))
428}
429
430fn applied_prior_for_entry(
431    entry: &HeuristicEntry,
432    priors: &StaticPriorSet,
433) -> Option<AppliedStaticPrior> {
434    priors.get(entry.id).map(|prior| AppliedStaticPrior {
435        heuristic_id: prior.heuristic_id,
436        confidence: prior.confidence,
437        drift_scale: prior.drift_scale,
438        slew_scale: prior.slew_scale,
439    })
440}
441
442fn effective_thresholds(
443    entry: &HeuristicEntry,
444    applied_prior: Option<&AppliedStaticPrior>,
445) -> (f64, f64) {
446    let drift_scale = applied_prior.map(|prior| prior.drift_scale).unwrap_or(1.0);
447    let slew_scale = applied_prior.map(|prior| prior.slew_scale).unwrap_or(1.0);
448    (
449        entry.drift_threshold * drift_scale,
450        entry.slew_threshold * slew_scale,
451    )
452}
453
454fn drift_threshold_matches(entry: &HeuristicEntry, sign: &ResidualSign, threshold: f64) -> bool {
455    let drift_abs = sign.drift.abs();
456    drift_abs >= threshold && !(entry.drift_positive_required && sign.drift < 0.0)
457}
458
459fn confidence_for_match(
460    sign: &ResidualSign,
461    effective_drift_threshold: f64,
462    effective_slew_threshold: f64,
463) -> f64 {
464    let drift_confidence = (sign.drift.abs() / effective_drift_threshold.max(1e-12)).min(3.0) / 3.0;
465    let slew_confidence = if effective_slew_threshold > 0.0 {
466        let slew_abs = sign.slew.abs();
467        if slew_abs < effective_slew_threshold {
468            0.3
469        } else {
470            (slew_abs / effective_slew_threshold.max(1e-12)).min(3.0) / 3.0
471        }
472    } else {
473        0.5
474    };
475
476    (drift_confidence + slew_confidence) / 2.0
477}
478
479fn matched_heuristic_result(
480    entry: &HeuristicEntry,
481    confidence: f64,
482    applied_prior: Option<AppliedStaticPrior>,
483) -> MatchResult {
484    MatchResult {
485        reason_code: entry.reason_code,
486        matched_heuristic: Some(entry.id),
487        confidence,
488        description: entry.description,
489        provenance: entry.provenance,
490        applied_prior,
491    }
492}
493
494fn unmatched_anomaly_result() -> MatchResult {
495    MatchResult {
496        reason_code: ReasonCode::UnclassifiedStructuralAnomaly,
497        matched_heuristic: None,
498        confidence: 0.0,
499        description: "Structural anomaly detected; no heuristic match",
500        provenance: "Grammar state transitioned but no bank entry satisfied its thresholds",
501        applied_prior: None,
502    }
503}
504
505#[cfg(test)]
506mod tests {
507    use super::*;
508
509    fn make_sign(source: ResidualSource, drift: f64, slew: f64) -> ResidualSign {
510        ResidualSign {
511            residual: 5.0,
512            drift,
513            slew,
514            timestamp_ns: 0,
515            source,
516        }
517    }
518
519    #[test]
520    fn test_admissible_returns_no_anomaly() {
521        let bank = HeuristicsBank::default_bank();
522        let sign = make_sign(ResidualSource::Latency, 0.5, 0.1);
523        let result = bank.match_sign(&sign, GrammarState::Admissible);
524        assert_eq!(result.reason_code, ReasonCode::NoAnomaly);
525    }
526
527    #[test]
528    fn test_heartbeat_drift_matches_raft_heuristic() {
529        let bank = HeuristicsBank::default_bank();
530        let sign = make_sign(ResidualSource::HeartbeatRtt, 0.1, 0.0);
531        let result = bank.match_sign(&sign, GrammarState::Boundary);
532        assert!(
533            result.reason_code == ReasonCode::ConsensusHeartbeatDegradation
534                || result.reason_code == ReasonCode::ClockDriftDivergence
535        );
536        assert!(result.matched_heuristic.is_some());
537    }
538
539    #[test]
540    fn test_unmatched_source_returns_unclassified() {
541        let bank = HeuristicsBank::default_bank();
542        // Custom source has no heuristic entries
543        let sign = make_sign(ResidualSource::Custom("unknown"), 0.5, 0.3);
544        let result = bank.match_sign(&sign, GrammarState::Violation);
545        assert_eq!(
546            result.reason_code,
547            ReasonCode::UnclassifiedStructuralAnomaly
548        );
549    }
550
551    #[test]
552    fn test_bank_has_12_entries() {
553        let bank = HeuristicsBank::default_bank();
554        assert_eq!(bank.len(), 12);
555    }
556}