Skip to main content

objects/object/
risk_signal.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Typed risk signals computed against a state and persisted alongside it.
3//!
4//! Computation is pure (`(prior_state, new_state, repo_config) -> Vec<RiskSignal>`)
5//! and lives in `crates/state_review/`. This module owns only the shape: what
6//! a signal is, how it serializes on disk, and the validation rules.
7//!
8//! The full set of fired signals is stored on the state. Tick budgeting (which
9//! signals to surface in the review UI) happens at render time and is never
10//! baked into storage — see [`state_review::budget`].
11//!
12//! Wire encoding: rmp-serde MessagePack. Format version is `1`. New optional
13//! fields are appended at the tail of [`RiskSignal`] with `#[serde(default)]`,
14//! matching the convention used elsewhere in the object model.
15
16use serde::{Deserialize, Serialize};
17
18use crate::object::hash::ChangeId;
19
20/// Maximum length of [`RiskSignal::reason`], in bytes.
21///
22/// The reason is meant to be a single sentence, surfaced in tight gutter UI.
23/// Keeping the cap at 200 forces producers to be specific and prevents the
24/// review payload from ballooning when many signals fire.
25pub const MAX_REASON_LEN: usize = 200;
26
27/// Top-level encoded blob. Stored under a [`ContentHash`] referenced from
28/// [`State::risk_signals`]. A blob with `format_version > FORMAT_VERSION` is
29/// rejected; older versions are read with the missing-field defaults.
30#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
31pub struct RiskSignalBlob {
32    pub format_version: u8,
33    pub signals: Vec<RiskSignal>,
34}
35
36versioned_msgpack_blob! {
37    blob: RiskSignalBlob,
38    item: RiskSignal,
39    field: signals,
40    error: RiskSignalError,
41    codec_err: Encoding,
42    version: 1,
43}
44
45#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
46pub struct RiskSignal {
47    pub kind: RiskSignalKind,
48    pub anchor: SignalAnchor,
49    pub reason: String,
50    pub producer: ProducerId,
51    /// Unix epoch seconds.
52    pub computed_at: i64,
53    /// Optional state this signal was computed against. Useful for retracing
54    /// when a signal moves between renders (e.g., anchor travel after a
55    /// rename).
56    #[serde(default)]
57    pub computed_against: Option<ChangeId>,
58}
59
60impl RiskSignal {
61    pub fn validate(&self) -> Result<(), RiskSignalError> {
62        if self.reason.is_empty() {
63            return Err(RiskSignalError::EmptyReason);
64        }
65        if self.reason.len() > MAX_REASON_LEN {
66            return Err(RiskSignalError::ReasonTooLong {
67                len: self.reason.len(),
68                max: MAX_REASON_LEN,
69            });
70        }
71        self.anchor.validate()?;
72        self.producer.validate()?;
73        Ok(())
74    }
75
76    /// Stable canonical anchor string used to group signals on the same anchor
77    /// during render-time budgeting. The format is intentionally simple so
78    /// budgeting comparisons are cheap and order-independent.
79    pub fn anchor_key(&self) -> String {
80        self.anchor.canonical()
81    }
82}
83
84/// Why a signal fired. Variants are wire-stable; new variants are appended.
85#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
86#[serde(rename_all = "snake_case")]
87pub enum RiskSignalKind {
88    /// New control-flow shape that doesn't appear elsewhere in the repo.
89    Novelty,
90    /// No test in the repo statically reaches the changed symbol.
91    /// Reasoning text *must* clarify this is static reachability via
92    /// tree-sitter, not runtime coverage.
93    TestReachability,
94    /// New code structurally diverges from local exemplars (sibling
95    /// functions or the prior version of the same symbol).
96    PatternDeviation,
97    /// An invariant or `enforces`-tagged annotation lives on the changed
98    /// symbol.
99    InvariantAdjacency,
100    /// Agent flagged uncertainty about its own output. Passthrough from
101    /// the captured state's provenance.
102    SelfFlaggedUncertainty,
103}
104
105impl RiskSignalKind {
106    pub fn as_str(&self) -> &'static str {
107        match self {
108            Self::Novelty => "novelty",
109            Self::TestReachability => "test_reachability",
110            Self::PatternDeviation => "pattern_deviation",
111            Self::InvariantAdjacency => "invariant_adjacency",
112            Self::SelfFlaggedUncertainty => "self_flagged_uncertainty",
113        }
114    }
115
116    /// Render-time priority. Lower numbers surface first when budgeting.
117    /// See `state_review::budget` for the full algorithm.
118    ///
119    /// The order is load-bearing: changing it changes which signals reviewers
120    /// see first when many fire on the same state. If you bump these numbers,
121    /// update the budgeting test goldens too.
122    pub fn priority_rank(&self) -> u8 {
123        match self {
124            Self::InvariantAdjacency => 0,
125            Self::SelfFlaggedUncertainty => 1,
126            Self::PatternDeviation => 2,
127            Self::Novelty => 3,
128            Self::TestReachability => 4,
129        }
130    }
131}
132
133/// Where in the change a signal fires. Symbol-level is preferred — symbols
134/// are durable across renames; line ranges are computed at fire time and
135/// drift as code is reformatted.
136#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
137pub struct SignalAnchor {
138    pub file: String,
139    #[serde(default)]
140    pub symbol: Option<String>,
141    #[serde(default)]
142    pub line_range: Option<(u32, u32)>,
143}
144
145impl SignalAnchor {
146    pub fn file(file: impl Into<String>) -> Self {
147        Self {
148            file: file.into(),
149            symbol: None,
150            line_range: None,
151        }
152    }
153
154    pub fn symbol(file: impl Into<String>, symbol: impl Into<String>) -> Self {
155        Self {
156            file: file.into(),
157            symbol: Some(symbol.into()),
158            line_range: None,
159        }
160    }
161
162    pub fn with_line_range(mut self, start: u32, end: u32) -> Self {
163        self.line_range = Some((start, end));
164        self
165    }
166
167    pub fn validate(&self) -> Result<(), RiskSignalError> {
168        if self.file.is_empty() {
169            return Err(RiskSignalError::EmptyAnchorFile);
170        }
171        if let Some((start, end)) = self.line_range
172            && start > end
173        {
174            return Err(RiskSignalError::InvalidLineRange(start, end));
175        }
176        Ok(())
177    }
178
179    /// Stable canonical form `<file>[:symbol][:start-end]` for grouping.
180    pub fn canonical(&self) -> String {
181        let mut s = self.file.clone();
182        if let Some(symbol) = &self.symbol {
183            s.push(':');
184            s.push_str(symbol);
185        }
186        if let Some((start, end)) = self.line_range {
187            s.push(':');
188            s.push_str(&format!("{start}-{end}"));
189        }
190        s
191    }
192}
193
194/// Identifies the producer that fired this signal. The `version` lets
195/// budgeting and signal-health surfaces age out signals from old producer
196/// versions without re-running computation — important when we tune a
197/// producer's heuristics and want to compare apples to apples.
198#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
199pub struct ProducerId {
200    pub module: String,
201    pub version: u32,
202}
203
204impl ProducerId {
205    pub fn new(module: impl Into<String>, version: u32) -> Self {
206        Self {
207            module: module.into(),
208            version,
209        }
210    }
211
212    pub fn validate(&self) -> Result<(), RiskSignalError> {
213        if self.module.is_empty() {
214            return Err(RiskSignalError::EmptyProducerModule);
215        }
216        Ok(())
217    }
218}
219
220#[derive(Debug, thiserror::Error)]
221pub enum RiskSignalError {
222    #[error("unsupported risk signal blob version {0}")]
223    UnsupportedVersion(u8),
224    #[error("risk signal reason must not be empty")]
225    EmptyReason,
226    #[error("risk signal reason too long ({len} bytes, max {max})")]
227    ReasonTooLong { len: usize, max: usize },
228    #[error("risk signal anchor must reference a non-empty file")]
229    EmptyAnchorFile,
230    #[error("risk signal line range start {0} exceeds end {1}")]
231    InvalidLineRange(u32, u32),
232    #[error("risk signal producer module must not be empty")]
233    EmptyProducerModule,
234    #[error("risk signal blob encoding error: {0}")]
235    Encoding(String),
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    fn sample_signal(kind: RiskSignalKind, file: &str, sym: &str) -> RiskSignal {
243        RiskSignal {
244            kind,
245            anchor: SignalAnchor::symbol(file, sym),
246            reason: "structural divergence from sibling implementations".into(),
247            producer: ProducerId::new("pattern_deviation", 1),
248            computed_at: 1_700_000_000,
249            computed_against: None,
250        }
251    }
252
253    #[test]
254    fn empty_reason_is_rejected() {
255        let mut sig = sample_signal(RiskSignalKind::Novelty, "src/lib.rs", "foo");
256        sig.reason = String::new();
257        assert!(matches!(sig.validate(), Err(RiskSignalError::EmptyReason)));
258    }
259
260    #[test]
261    fn over_long_reason_is_rejected() {
262        let mut sig = sample_signal(RiskSignalKind::Novelty, "src/lib.rs", "foo");
263        sig.reason = "x".repeat(MAX_REASON_LEN + 1);
264        assert!(matches!(
265            sig.validate(),
266            Err(RiskSignalError::ReasonTooLong { .. })
267        ));
268    }
269
270    #[test]
271    fn minimum_anchor_validates() {
272        let sig = sample_signal(RiskSignalKind::TestReachability, "src/lib.rs", "bar");
273        sig.validate().unwrap();
274    }
275
276    #[test]
277    fn anchor_canonical_is_stable() {
278        let a = SignalAnchor::symbol("src/lib.rs", "foo").with_line_range(10, 12);
279        let b = SignalAnchor::symbol("src/lib.rs", "foo").with_line_range(10, 12);
280        assert_eq!(a.canonical(), b.canonical());
281        assert_eq!(a.canonical(), "src/lib.rs:foo:10-12");
282    }
283
284    #[test]
285    fn priority_order_matches_spec() {
286        assert!(
287            RiskSignalKind::InvariantAdjacency.priority_rank()
288                < RiskSignalKind::SelfFlaggedUncertainty.priority_rank()
289        );
290        assert!(
291            RiskSignalKind::SelfFlaggedUncertainty.priority_rank()
292                < RiskSignalKind::PatternDeviation.priority_rank()
293        );
294        assert!(
295            RiskSignalKind::PatternDeviation.priority_rank()
296                < RiskSignalKind::Novelty.priority_rank()
297        );
298        assert!(
299            RiskSignalKind::Novelty.priority_rank()
300                < RiskSignalKind::TestReachability.priority_rank()
301        );
302    }
303
304    #[test]
305    fn blob_encode_decode_roundtrips() {
306        let blob = RiskSignalBlob::new(vec![sample_signal(
307            RiskSignalKind::Novelty,
308            "src/lib.rs",
309            "foo",
310        )]);
311        let bytes = blob.encode().unwrap();
312        let decoded = RiskSignalBlob::decode(&bytes).unwrap();
313        assert_eq!(blob, decoded);
314    }
315
316    #[test]
317    fn future_version_is_rejected() {
318        let blob = RiskSignalBlob {
319            format_version: RiskSignalBlob::FORMAT_VERSION + 1,
320            signals: vec![],
321        };
322        assert!(matches!(
323            blob.validate(),
324            Err(RiskSignalError::UnsupportedVersion(_))
325        ));
326    }
327
328    #[test]
329    fn empty_producer_module_rejected() {
330        let mut sig = sample_signal(RiskSignalKind::Novelty, "src/lib.rs", "foo");
331        sig.producer.module = String::new();
332        assert!(matches!(
333            sig.validate(),
334            Err(RiskSignalError::EmptyProducerModule)
335        ));
336    }
337}