Skip to main content

objects/object/
risk_signal.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Typed risk signals computed against a state and persisted alongside it.
3//!
4//! Computation is pure (`(prior_state, new_state, repo_config) -> Vec<RiskSignal>`)
5//! and lives in `crates/state_review/`. This module owns only the shape: what
6//! a signal is, how it serializes on disk, and the validation rules.
7//!
8//! The full set of fired signals is stored on the state. Tick budgeting (which
9//! signals to surface in the review UI) happens at render time and is never
10//! baked into storage — see [`state_review::budget`].
11//!
12//! Wire encoding: rmp-serde MessagePack. Format version is `1`. New optional
13//! fields are appended at the tail of [`RiskSignal`] with `#[serde(default)]`,
14//! matching the convention used elsewhere in the object model.
15
16use serde::{Deserialize, Serialize};
17
18use crate::object::hash::ChangeId;
19
20/// Maximum length of [`RiskSignal::reason`], in bytes.
21///
22/// The reason is meant to be a single sentence, surfaced in tight gutter UI.
23/// Keeping the cap at 200 forces producers to be specific and prevents the
24/// review payload from ballooning when many signals fire.
25pub const MAX_REASON_LEN: usize = 200;
26
27/// Top-level encoded blob. Stored under a [`ContentHash`] referenced from
28/// [`State::risk_signals`]. A blob with `format_version > FORMAT_VERSION` is
29/// rejected; older versions are read with the missing-field defaults.
30#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
31pub struct RiskSignalBlob {
32    pub format_version: u8,
33    pub signals: Vec<RiskSignal>,
34}
35
36impl RiskSignalBlob {
37    pub const FORMAT_VERSION: u8 = 1;
38
39    pub fn new(signals: Vec<RiskSignal>) -> Self {
40        Self {
41            format_version: Self::FORMAT_VERSION,
42            signals,
43        }
44    }
45
46    pub fn encode(&self) -> Result<Vec<u8>, RiskSignalError> {
47        rmp_serde::to_vec(self).map_err(|err| RiskSignalError::Encoding(err.to_string()))
48    }
49
50    pub fn decode(bytes: &[u8]) -> Result<Self, RiskSignalError> {
51        let blob: Self = rmp_serde::from_slice(bytes)
52            .map_err(|err| RiskSignalError::Encoding(err.to_string()))?;
53        blob.validate()?;
54        Ok(blob)
55    }
56
57    pub fn validate(&self) -> Result<(), RiskSignalError> {
58        if self.format_version != Self::FORMAT_VERSION {
59            return Err(RiskSignalError::UnsupportedVersion(self.format_version));
60        }
61        for signal in &self.signals {
62            signal.validate()?;
63        }
64        Ok(())
65    }
66}
67
68#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
69pub struct RiskSignal {
70    pub kind: RiskSignalKind,
71    pub anchor: SignalAnchor,
72    pub reason: String,
73    pub producer: ProducerId,
74    /// Unix epoch seconds.
75    pub computed_at: i64,
76    /// Optional state this signal was computed against. Useful for retracing
77    /// when a signal moves between renders (e.g., anchor travel after a
78    /// rename).
79    #[serde(default)]
80    pub computed_against: Option<ChangeId>,
81}
82
83impl RiskSignal {
84    pub fn validate(&self) -> Result<(), RiskSignalError> {
85        if self.reason.is_empty() {
86            return Err(RiskSignalError::EmptyReason);
87        }
88        if self.reason.len() > MAX_REASON_LEN {
89            return Err(RiskSignalError::ReasonTooLong {
90                len: self.reason.len(),
91                max: MAX_REASON_LEN,
92            });
93        }
94        self.anchor.validate()?;
95        self.producer.validate()?;
96        Ok(())
97    }
98
99    /// Stable canonical anchor string used to group signals on the same anchor
100    /// during render-time budgeting. The format is intentionally simple so
101    /// budgeting comparisons are cheap and order-independent.
102    pub fn anchor_key(&self) -> String {
103        self.anchor.canonical()
104    }
105}
106
107/// Why a signal fired. Variants are wire-stable; new variants are appended.
108#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
109#[serde(rename_all = "snake_case")]
110pub enum RiskSignalKind {
111    /// New control-flow shape that doesn't appear elsewhere in the repo.
112    Novelty,
113    /// No test in the repo statically reaches the changed symbol.
114    /// Reasoning text *must* clarify this is static reachability via
115    /// tree-sitter, not runtime coverage.
116    TestReachability,
117    /// New code structurally diverges from local exemplars (sibling
118    /// functions or the prior version of the same symbol).
119    PatternDeviation,
120    /// An invariant or `enforces`-tagged annotation lives on the changed
121    /// symbol.
122    InvariantAdjacency,
123    /// Agent flagged uncertainty about its own output. Passthrough from
124    /// the captured state's provenance.
125    SelfFlaggedUncertainty,
126}
127
128impl RiskSignalKind {
129    pub fn as_str(&self) -> &'static str {
130        match self {
131            Self::Novelty => "novelty",
132            Self::TestReachability => "test_reachability",
133            Self::PatternDeviation => "pattern_deviation",
134            Self::InvariantAdjacency => "invariant_adjacency",
135            Self::SelfFlaggedUncertainty => "self_flagged_uncertainty",
136        }
137    }
138
139    /// Render-time priority. Lower numbers surface first when budgeting.
140    /// See `state_review::budget` for the full algorithm.
141    ///
142    /// The order is load-bearing: changing it changes which signals reviewers
143    /// see first when many fire on the same state. If you bump these numbers,
144    /// update the budgeting test goldens too.
145    pub fn priority_rank(&self) -> u8 {
146        match self {
147            Self::InvariantAdjacency => 0,
148            Self::SelfFlaggedUncertainty => 1,
149            Self::PatternDeviation => 2,
150            Self::Novelty => 3,
151            Self::TestReachability => 4,
152        }
153    }
154}
155
156/// Where in the change a signal fires. Symbol-level is preferred — symbols
157/// are durable across renames; line ranges are computed at fire time and
158/// drift as code is reformatted.
159#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
160pub struct SignalAnchor {
161    pub file: String,
162    #[serde(default)]
163    pub symbol: Option<String>,
164    #[serde(default)]
165    pub line_range: Option<(u32, u32)>,
166}
167
168impl SignalAnchor {
169    pub fn file(file: impl Into<String>) -> Self {
170        Self {
171            file: file.into(),
172            symbol: None,
173            line_range: None,
174        }
175    }
176
177    pub fn symbol(file: impl Into<String>, symbol: impl Into<String>) -> Self {
178        Self {
179            file: file.into(),
180            symbol: Some(symbol.into()),
181            line_range: None,
182        }
183    }
184
185    pub fn with_line_range(mut self, start: u32, end: u32) -> Self {
186        self.line_range = Some((start, end));
187        self
188    }
189
190    pub fn validate(&self) -> Result<(), RiskSignalError> {
191        if self.file.is_empty() {
192            return Err(RiskSignalError::EmptyAnchorFile);
193        }
194        if let Some((start, end)) = self.line_range
195            && start > end
196        {
197            return Err(RiskSignalError::InvalidLineRange(start, end));
198        }
199        Ok(())
200    }
201
202    /// Stable canonical form `<file>[:symbol][:start-end]` for grouping.
203    pub fn canonical(&self) -> String {
204        let mut s = self.file.clone();
205        if let Some(symbol) = &self.symbol {
206            s.push(':');
207            s.push_str(symbol);
208        }
209        if let Some((start, end)) = self.line_range {
210            s.push(':');
211            s.push_str(&format!("{start}-{end}"));
212        }
213        s
214    }
215}
216
217/// Identifies the producer that fired this signal. The `version` lets
218/// budgeting and signal-health surfaces age out signals from old producer
219/// versions without re-running computation — important when we tune a
220/// producer's heuristics and want to compare apples to apples.
221#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
222pub struct ProducerId {
223    pub module: String,
224    pub version: u32,
225}
226
227impl ProducerId {
228    pub fn new(module: impl Into<String>, version: u32) -> Self {
229        Self {
230            module: module.into(),
231            version,
232        }
233    }
234
235    pub fn validate(&self) -> Result<(), RiskSignalError> {
236        if self.module.is_empty() {
237            return Err(RiskSignalError::EmptyProducerModule);
238        }
239        Ok(())
240    }
241}
242
243#[derive(Debug, thiserror::Error)]
244pub enum RiskSignalError {
245    #[error("unsupported risk signal blob version {0}")]
246    UnsupportedVersion(u8),
247    #[error("risk signal reason must not be empty")]
248    EmptyReason,
249    #[error("risk signal reason too long ({len} bytes, max {max})")]
250    ReasonTooLong { len: usize, max: usize },
251    #[error("risk signal anchor must reference a non-empty file")]
252    EmptyAnchorFile,
253    #[error("risk signal line range start {0} exceeds end {1}")]
254    InvalidLineRange(u32, u32),
255    #[error("risk signal producer module must not be empty")]
256    EmptyProducerModule,
257    #[error("risk signal blob encoding error: {0}")]
258    Encoding(String),
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264
265    fn sample_signal(kind: RiskSignalKind, file: &str, sym: &str) -> RiskSignal {
266        RiskSignal {
267            kind,
268            anchor: SignalAnchor::symbol(file, sym),
269            reason: "structural divergence from sibling implementations".into(),
270            producer: ProducerId::new("pattern_deviation", 1),
271            computed_at: 1_700_000_000,
272            computed_against: None,
273        }
274    }
275
276    #[test]
277    fn empty_reason_is_rejected() {
278        let mut sig = sample_signal(RiskSignalKind::Novelty, "src/lib.rs", "foo");
279        sig.reason = String::new();
280        assert!(matches!(sig.validate(), Err(RiskSignalError::EmptyReason)));
281    }
282
283    #[test]
284    fn over_long_reason_is_rejected() {
285        let mut sig = sample_signal(RiskSignalKind::Novelty, "src/lib.rs", "foo");
286        sig.reason = "x".repeat(MAX_REASON_LEN + 1);
287        assert!(matches!(
288            sig.validate(),
289            Err(RiskSignalError::ReasonTooLong { .. })
290        ));
291    }
292
293    #[test]
294    fn minimum_anchor_validates() {
295        let sig = sample_signal(RiskSignalKind::TestReachability, "src/lib.rs", "bar");
296        sig.validate().unwrap();
297    }
298
299    #[test]
300    fn anchor_canonical_is_stable() {
301        let a = SignalAnchor::symbol("src/lib.rs", "foo").with_line_range(10, 12);
302        let b = SignalAnchor::symbol("src/lib.rs", "foo").with_line_range(10, 12);
303        assert_eq!(a.canonical(), b.canonical());
304        assert_eq!(a.canonical(), "src/lib.rs:foo:10-12");
305    }
306
307    #[test]
308    fn priority_order_matches_spec() {
309        assert!(
310            RiskSignalKind::InvariantAdjacency.priority_rank()
311                < RiskSignalKind::SelfFlaggedUncertainty.priority_rank()
312        );
313        assert!(
314            RiskSignalKind::SelfFlaggedUncertainty.priority_rank()
315                < RiskSignalKind::PatternDeviation.priority_rank()
316        );
317        assert!(
318            RiskSignalKind::PatternDeviation.priority_rank()
319                < RiskSignalKind::Novelty.priority_rank()
320        );
321        assert!(
322            RiskSignalKind::Novelty.priority_rank()
323                < RiskSignalKind::TestReachability.priority_rank()
324        );
325    }
326
327    #[test]
328    fn blob_encode_decode_roundtrips() {
329        let blob = RiskSignalBlob::new(vec![sample_signal(
330            RiskSignalKind::Novelty,
331            "src/lib.rs",
332            "foo",
333        )]);
334        let bytes = blob.encode().unwrap();
335        let decoded = RiskSignalBlob::decode(&bytes).unwrap();
336        assert_eq!(blob, decoded);
337    }
338
339    #[test]
340    fn future_version_is_rejected() {
341        let blob = RiskSignalBlob {
342            format_version: RiskSignalBlob::FORMAT_VERSION + 1,
343            signals: vec![],
344        };
345        assert!(matches!(
346            blob.validate(),
347            Err(RiskSignalError::UnsupportedVersion(_))
348        ));
349    }
350
351    #[test]
352    fn empty_producer_module_rejected() {
353        let mut sig = sample_signal(RiskSignalKind::Novelty, "src/lib.rs", "foo");
354        sig.producer.module = String::new();
355        assert!(matches!(
356            sig.validate(),
357            Err(RiskSignalError::EmptyProducerModule)
358        ));
359    }
360}