Skip to main content

difflore_core/observability/
trajectory.rs

1//! Review trajectory builder.
2//!
3//! A trajectory is the ordered decision trail for a single review run:
4//! what chunks were retrieved, what rules were applied, which LLM calls
5//! fired, what past verdicts were recalled, what the self-check kept vs
6//! dropped, and which issues were finally emitted.
7//!
8//! The builder is deliberately additive and optional: the review pipeline
9//! threads an `Option<&mut TrajectoryBuilder>` through its hot path, so callers
10//! that do not need trajectory data pass `None`.
11//!
12//! The JSON shape produced by `into_json()` is byte-compatible with the
13//! TypeScript discriminated union in
14//! `difflore-cloud/src/types/trajectory.ts`. When that shape changes,
15//! BOTH sides must be updated in lockstep — the `saveTrajectory` oRPC
16//! endpoint validates the payload with the matching Zod schema on
17//! ingress, so any drift fails the round-trip test.
18
19use serde::{Deserialize, Serialize};
20
21/// Where a `rules_applied` step's rules came from. Matches the TS
22/// `TrajectoryRuleSource` literal set exactly.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
24#[serde(rename_all = "snake_case")]
25pub enum RuleSource {
26    Local,
27    Team,
28    Global,
29}
30
31/// One past verdict recalled from the review-memory store, surfaced on
32/// the cloud detail page so reviewers can see **which** prior decisions
33/// influenced the current run. Shape: `{ id, title, similarity, excerpt }`.
34/// The `excerpt` field is
35/// truncated by callers to ~200 characters (with a trailing `…`) so the
36/// trajectory payload stays compact.
37#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
38pub struct RecalledVerdict {
39    pub id: String,
40    pub title: String,
41    pub similarity: f32,
42    pub excerpt: String,
43}
44
45/// Ordered discriminated step. Serialized with `tag = "kind"` so the JSON
46/// shape matches the TS union; every new variant must add a matching
47/// zod arm in `difflore-cloud/src/types/trajectory.ts`.
48#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
49#[serde(tag = "kind", rename_all = "snake_case")]
50pub enum TrajectoryStep {
51    /// Retrieval pass produced N chunks for the prompt context.
52    ChunksRetrieved {
53        count: usize,
54        symbols: Vec<String>,
55        similarity_scores: Vec<f32>,
56    },
57    /// Rule resolution picked the given rule IDs from `source`.
58    RulesApplied {
59        rule_ids: Vec<String>,
60        source: RuleSource,
61    },
62    /// One LLM invocation — perspective + token usage. `raw_output` is
63    /// optional so callers can choose to omit it for cost/privacy.
64    LlmCall {
65        perspective: String,
66        input_tokens: u32,
67        output_tokens: u32,
68        #[serde(skip_serializing_if = "Option::is_none", default)]
69        raw_output: Option<String>,
70    },
71    /// Review Memory recall fetched `count` past verdicts with the given
72    /// top-k similarity scores. `recalled_items` carries the per-verdict
73    /// payload the cloud detail page renders (id/title/similarity/excerpt);
74    /// it is `#[serde(default)]` so older trajectories that only carry
75    /// `count` + `top_similarities` still round-trip cleanly.
76    PastVerdictsRecalled {
77        count: usize,
78        top_similarities: Vec<f32>,
79        #[serde(default)]
80        recalled_items: Vec<RecalledVerdict>,
81    },
82    /// Self-check (`verify_pass`) kept N issues, dropped M, and produced
83    /// an average confidence score across the kept set.
84    SelfCheck {
85        keep_count: u32,
86        drop_count: u32,
87        avg_confidence: f32,
88    },
89    /// Signature-based confidence adjustment applied after self-check.
90    /// Records per-issue adjustments so the cloud detail page can show
91    /// which past verdicts influenced confidence scoring.
92    SignatureConfidenceAdjust {
93        /// Number of issues that received a positive bump (accepted match).
94        accepted_bumps: u32,
95        /// Number of issues that received a negative bump (rejected match).
96        rejected_bumps: u32,
97    },
98    /// Final decision: the issue IDs emitted to the user.
99    FinalDecision { issue_ids_emitted: Vec<String> },
100    /// MCP tool responded with `total_tokens` worth of payload, of which
101    /// `rules_injected` rules were included. Lets the cloud dashboard chart
102    /// MCP response sizes over time so we can spot token bloat early.
103    /// Token count is a coarse estimate (`byte_len` / 4).
104    McpResponseSize {
105        tool: String,
106        total_tokens: usize,
107        rules_injected: usize,
108    },
109    /// Breakdown of which origins the hit rules came from. Aggregated
110    /// across one MCP response so downstream analytics can answer
111    /// "how much value are conversation captures vs extracted rules
112    /// actually driving in recall".
113    RuleHitByOrigin {
114        manual: u32,
115        conversation: u32,
116        pr_review: u32,
117        extracted: u32,
118        cloud: u32,
119    },
120    /// How many candidate chunks the metadata pre-filter kept. `before` is the
121    /// count pre-filter; `after` is the count the embedding / FTS path scored.
122    RetrievalFilter { before: u32, after: u32 },
123    /// RRF fusion of the FTS and embedding candidate sets. `fts_hits` /
124    /// `emb_hits` record raw pre-fusion sizes; `overlap` records how many chunk
125    /// ids appeared in both.
126    HybridFusion {
127        fts_hits: u32,
128        emb_hits: u32,
129        overlap: u32,
130    },
131    /// HNSW ANN recall stats for a single retrieval call.
132    /// `used = true` means the ANN path produced candidates that fed
133    /// the RRF fusion; `used = false` means we fell back to the linear
134    /// cosine scan (empty index, dim mismatch, or any internal error).
135    /// `index_size` is the live (non-tombstoned) chunk count known to
136    /// the ANN graph at call time; `candidates` is how many top-k
137    /// results came back from `ann.search` before RRF de-duped and
138    /// re-ranked them.
139    AnnRecall {
140        used: bool,
141        index_size: u32,
142        candidates: u32,
143    },
144}
145
146/// Ordered collector for `TrajectoryStep`s. Threaded through the review
147/// pipeline as `Option<&mut TrajectoryBuilder>` so absence is a no-op.
148///
149/// Construction is `Default::default()`; callers push steps in the order
150/// they happen and finish with `into_json()` to hand the serialized
151/// payload off to the cloud `saveTrajectory` endpoint.
152#[derive(Debug, Clone, Default)]
153pub struct TrajectoryBuilder {
154    steps: Vec<TrajectoryStep>,
155}
156
157impl TrajectoryBuilder {
158    /// Start a fresh builder.
159    pub fn new() -> Self {
160        Self::default()
161    }
162
163    /// Append a single step. Preserves insertion order; callers control
164    /// the ordering so the resulting trajectory reads as a timeline.
165    pub fn push(&mut self, step: TrajectoryStep) {
166        self.steps.push(step);
167    }
168
169    /// Number of steps collected so far. Useful for tests + the final
170    /// decision step which wants to know "did anything at all happen".
171    pub const fn len(&self) -> usize {
172        self.steps.len()
173    }
174
175    /// Convenience: true when no steps have been pushed.
176    pub const fn is_empty(&self) -> bool {
177        self.steps.is_empty()
178    }
179
180    /// Borrow the steps collected so far — used by tests so they can
181    /// introspect without consuming the builder.
182    pub fn steps(&self) -> &[TrajectoryStep] {
183        &self.steps
184    }
185
186    /// Consume the builder and serialize to `serde_json::Value`. The
187    /// returned value is an array (`Value::Array`) of step objects, one
188    /// per `push`. Matches the TS side's `TrajectoryStep[]` exactly.
189    pub fn into_json(self) -> serde_json::Value {
190        serde_json::to_value(self.steps).unwrap_or(serde_json::Value::Array(vec![]))
191    }
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197
198    #[test]
199    fn serialize_matches_ts_shape_exactly() {
200        // The TS union uses `kind: "chunks_retrieved"` with snake_case
201        // field names. Lock the exact bytes so field drift is caught here.
202        let mut b = TrajectoryBuilder::new();
203        b.push(TrajectoryStep::ChunksRetrieved {
204            count: 2,
205            symbols: vec!["foo".into()],
206            similarity_scores: vec![0.91],
207        });
208        b.push(TrajectoryStep::SelfCheck {
209            keep_count: 3,
210            drop_count: 1,
211            avg_confidence: 0.82,
212        });
213        let value = b.into_json();
214        let arr = value.as_array().expect("top-level must be array");
215        assert_eq!(arr.len(), 2);
216
217        let first = arr[0].as_object().unwrap();
218        assert_eq!(
219            first.get("kind").and_then(|v| v.as_str()),
220            Some("chunks_retrieved")
221        );
222        assert_eq!(
223            first.get("count").and_then(serde_json::Value::as_u64),
224            Some(2)
225        );
226        assert!(first.contains_key("symbols"));
227        assert!(first.contains_key("similarity_scores"));
228
229        let second = arr[1].as_object().unwrap();
230        assert_eq!(
231            second.get("kind").and_then(|v| v.as_str()),
232            Some("self_check")
233        );
234        assert_eq!(
235            second.get("keep_count").and_then(serde_json::Value::as_u64),
236            Some(3)
237        );
238        assert_eq!(
239            second.get("drop_count").and_then(serde_json::Value::as_u64),
240            Some(1)
241        );
242        assert!(
243            (second
244                .get("avg_confidence")
245                .and_then(serde_json::Value::as_f64)
246                .unwrap()
247                - 0.82)
248                .abs()
249                < 1e-6
250        );
251    }
252
253    #[test]
254    fn llm_call_omits_raw_output_when_absent() {
255        // `raw_output` is `#[serde(skip_serializing_if = "Option::is_none")]`
256        // so absent output should not emit the key at all. This keeps the
257        // on-the-wire shape minimal for cost-sensitive deployments.
258        let mut b = TrajectoryBuilder::new();
259        b.push(TrajectoryStep::LlmCall {
260            perspective: "safety".into(),
261            input_tokens: 123,
262            output_tokens: 45,
263            raw_output: None,
264        });
265        let value = b.into_json();
266        let obj = value.as_array().unwrap()[0].as_object().unwrap();
267        assert_eq!(
268            obj.get("perspective").and_then(|v| v.as_str()),
269            Some("safety")
270        );
271        assert!(!obj.contains_key("raw_output"));
272    }
273
274    #[test]
275    fn full_pipeline_shape_matches_plan_capture_points() {
276        // Simulates the sequence emitted on a successful run: chunks, rules,
277        // past verdicts, one LLM call per perspective, self_check, then
278        // final_decision. Ordering is part of the wire contract.
279        let mut b = TrajectoryBuilder::new();
280        b.push(TrajectoryStep::ChunksRetrieved {
281            count: 4,
282            symbols: vec!["foo".into()],
283            similarity_scores: vec![],
284        });
285        b.push(TrajectoryStep::RulesApplied {
286            rule_ids: vec!["r1".into(), "r2".into()],
287            source: RuleSource::Team,
288        });
289        b.push(TrajectoryStep::PastVerdictsRecalled {
290            count: 2,
291            top_similarities: vec![],
292            recalled_items: vec![],
293        });
294        for p in ["safety", "performance", "style", "docs", "api_design"] {
295            b.push(TrajectoryStep::LlmCall {
296                perspective: p.to_owned(),
297                input_tokens: 200,
298                output_tokens: 0,
299                raw_output: None,
300            });
301        }
302        b.push(TrajectoryStep::SelfCheck {
303            keep_count: 3,
304            drop_count: 1,
305            avg_confidence: 0.87,
306        });
307        b.push(TrajectoryStep::FinalDecision {
308            issue_ids_emitted: vec!["issue-1".into(), "issue-2".into(), "issue-3".into()],
309        });
310
311        assert_eq!(b.len(), 1 + 1 + 1 + 5 + 1 + 1);
312
313        // Kind order must be exactly this. Any reorder is a breaking
314        // change to the wire format and should fail here first.
315        let kinds: Vec<&str> = b
316            .steps()
317            .iter()
318            .map(|s| match s {
319                TrajectoryStep::ChunksRetrieved { .. } => "chunks_retrieved",
320                TrajectoryStep::RulesApplied { .. } => "rules_applied",
321                TrajectoryStep::PastVerdictsRecalled { .. } => "past_verdicts_recalled",
322                TrajectoryStep::LlmCall { .. } => "llm_call",
323                TrajectoryStep::SelfCheck { .. } => "self_check",
324                TrajectoryStep::SignatureConfidenceAdjust { .. } => "signature_confidence_adjust",
325                TrajectoryStep::FinalDecision { .. } => "final_decision",
326                TrajectoryStep::McpResponseSize { .. } => "mcp_response_size",
327                TrajectoryStep::RuleHitByOrigin { .. } => "rule_hit_by_origin",
328                TrajectoryStep::RetrievalFilter { .. } => "retrieval_filter",
329                TrajectoryStep::HybridFusion { .. } => "hybrid_fusion",
330                TrajectoryStep::AnnRecall { .. } => "ann_recall",
331            })
332            .collect();
333        assert_eq!(
334            kinds,
335            vec![
336                "chunks_retrieved",
337                "rules_applied",
338                "past_verdicts_recalled",
339                "llm_call",
340                "llm_call",
341                "llm_call",
342                "llm_call",
343                "llm_call",
344                "self_check",
345                "final_decision",
346            ]
347        );
348    }
349
350    #[test]
351    fn mcp_response_size_and_rule_hit_by_origin_serialize() {
352        // Lock the on-the-wire shape so telemetry consumers can rely on stable
353        // field names.
354        let mut b = TrajectoryBuilder::new();
355        b.push(TrajectoryStep::McpResponseSize {
356            tool: "search_rules".into(),
357            total_tokens: 1234,
358            rules_injected: 3,
359        });
360        b.push(TrajectoryStep::RuleHitByOrigin {
361            manual: 1,
362            conversation: 2,
363            pr_review: 0,
364            extracted: 1,
365            cloud: 0,
366        });
367
368        let value = b.clone().into_json();
369        let arr = value.as_array().expect("top-level array");
370        assert_eq!(arr[0]["kind"], "mcp_response_size");
371        assert_eq!(arr[0]["tool"], "search_rules");
372        assert_eq!(arr[0]["total_tokens"], 1234);
373        assert_eq!(arr[0]["rules_injected"], 3);
374        assert_eq!(arr[1]["kind"], "rule_hit_by_origin");
375        assert_eq!(arr[1]["manual"], 1);
376        assert_eq!(arr[1]["conversation"], 2);
377        assert_eq!(arr[1]["pr_review"], 0);
378        assert_eq!(arr[1]["extracted"], 1);
379        assert_eq!(arr[1]["cloud"], 0);
380
381        // Round-trip back to the enum so field drift fails here.
382        let text = serde_json::to_string(&value).unwrap();
383        let parsed: Vec<TrajectoryStep> = serde_json::from_str(&text).unwrap();
384        assert_eq!(parsed, b.steps().to_vec());
385    }
386
387    #[test]
388    fn round_trip_deserialize_via_serde_json() {
389        // Ensures the on-the-wire bytes deserialize back into the same
390        // enum variants — locks the `tag = "kind"` contract.
391        let mut b = TrajectoryBuilder::new();
392        b.push(TrajectoryStep::PastVerdictsRecalled {
393            count: 4,
394            top_similarities: vec![0.95, 0.88, 0.80, 0.72],
395            recalled_items: vec![RecalledVerdict {
396                id: "verdict-1".into(),
397                title: "avoid unwrap in request handlers".into(),
398                similarity: 0.95,
399                excerpt: "fn handler() { ... .unwrap() ... }".into(),
400            }],
401        });
402        b.push(TrajectoryStep::RulesApplied {
403            rule_ids: vec!["r1".into(), "r2".into()],
404            source: RuleSource::Global,
405        });
406        b.push(TrajectoryStep::FinalDecision {
407            issue_ids_emitted: vec!["issue-1".into()],
408        });
409
410        let value = b.clone().into_json();
411        let text = serde_json::to_string(&value).unwrap();
412        let parsed: Vec<TrajectoryStep> = serde_json::from_str(&text).unwrap();
413        assert_eq!(parsed, b.steps().to_vec());
414    }
415}