difflore_core/observability/trajectory.rs
1//! Review trajectory builder.
2//!
3//! A trajectory is the ordered decision trail for a single review run:
4//! what chunks were retrieved, what rules were applied, which LLM calls
5//! fired, what past verdicts were recalled, what the self-check kept vs
6//! dropped, and which issues were finally emitted.
7//!
8//! The builder is deliberately additive and optional: the review pipeline
9//! threads an `Option<&mut TrajectoryBuilder>` through its hot path, so callers
10//! that do not need trajectory data pass `None`.
11//!
12//! The JSON shape produced by `into_json()` is byte-compatible with the
13//! TypeScript discriminated union in
14//! `difflore-cloud/src/types/trajectory.ts`. When that shape changes,
15//! BOTH sides must be updated in lockstep — the `saveTrajectory` oRPC
16//! endpoint validates the payload with the matching Zod schema on
17//! ingress, so any drift fails the round-trip test.
18
19use serde::{Deserialize, Serialize};
20
21/// Where a `rules_applied` step's rules came from. Matches the TS
22/// `TrajectoryRuleSource` literal set exactly.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
24#[serde(rename_all = "snake_case")]
25pub enum RuleSource {
26 Local,
27 Team,
28 Global,
29}
30
31/// One past verdict recalled from the review-memory store, surfaced on
32/// the cloud detail page so reviewers can see **which** prior decisions
33/// influenced the current run. Shape: `{ id, title, similarity, excerpt }`.
34/// The `excerpt` field is
35/// truncated by callers to ~200 characters (with a trailing `…`) so the
36/// trajectory payload stays compact.
37#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
38pub struct RecalledVerdict {
39 pub id: String,
40 pub title: String,
41 pub similarity: f32,
42 pub excerpt: String,
43}
44
45/// Ordered discriminated step. Serialized with `tag = "kind"` so the JSON
46/// shape matches the TS union; every new variant must add a matching
47/// zod arm in `difflore-cloud/src/types/trajectory.ts`.
48#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
49#[serde(tag = "kind", rename_all = "snake_case")]
50pub enum TrajectoryStep {
51 /// Retrieval pass produced N chunks for the prompt context.
52 ChunksRetrieved {
53 count: usize,
54 symbols: Vec<String>,
55 similarity_scores: Vec<f32>,
56 },
57 /// Rule resolution picked the given rule IDs from `source`.
58 RulesApplied {
59 rule_ids: Vec<String>,
60 source: RuleSource,
61 },
62 /// One LLM invocation — perspective + token usage. `raw_output` is
63 /// optional so callers can choose to omit it for cost/privacy.
64 LlmCall {
65 perspective: String,
66 input_tokens: u32,
67 output_tokens: u32,
68 #[serde(skip_serializing_if = "Option::is_none", default)]
69 raw_output: Option<String>,
70 },
71 /// Review Memory recall fetched `count` past verdicts with the given
72 /// top-k similarity scores. `recalled_items` carries the per-verdict
73 /// payload the cloud detail page renders (id/title/similarity/excerpt);
74 /// it is `#[serde(default)]` so older trajectories that only carry
75 /// `count` + `top_similarities` still round-trip cleanly.
76 PastVerdictsRecalled {
77 count: usize,
78 top_similarities: Vec<f32>,
79 #[serde(default)]
80 recalled_items: Vec<RecalledVerdict>,
81 },
82 /// Self-check (`verify_pass`) kept N issues, dropped M, and produced
83 /// an average confidence score across the kept set.
84 SelfCheck {
85 keep_count: u32,
86 drop_count: u32,
87 avg_confidence: f32,
88 },
89 /// Signature-based confidence adjustment applied after self-check.
90 /// Records per-issue adjustments so the cloud detail page can show
91 /// which past verdicts influenced confidence scoring.
92 SignatureConfidenceAdjust {
93 /// Number of issues that received a positive bump (accepted match).
94 accepted_bumps: u32,
95 /// Number of issues that received a negative bump (rejected match).
96 rejected_bumps: u32,
97 },
98 /// Final decision: the issue IDs emitted to the user.
99 FinalDecision { issue_ids_emitted: Vec<String> },
100 /// MCP tool responded with `total_tokens` worth of payload, of which
101 /// `rules_injected` rules were included. Lets the cloud dashboard chart
102 /// MCP response sizes over time so we can spot token bloat early.
103 /// Token count is a coarse estimate (`byte_len` / 4).
104 McpResponseSize {
105 tool: String,
106 total_tokens: usize,
107 rules_injected: usize,
108 },
109 /// Breakdown of which origins the hit rules came from. Aggregated
110 /// across one MCP response so downstream analytics can answer
111 /// "how much value are conversation captures vs extracted rules
112 /// actually driving in recall".
113 RuleHitByOrigin {
114 manual: u32,
115 conversation: u32,
116 pr_review: u32,
117 extracted: u32,
118 cloud: u32,
119 },
120 /// How many candidate chunks the metadata pre-filter kept. `before` is the
121 /// count pre-filter; `after` is the count the embedding / FTS path scored.
122 RetrievalFilter { before: u32, after: u32 },
123 /// RRF fusion of the FTS and embedding candidate sets. `fts_hits` /
124 /// `emb_hits` record raw pre-fusion sizes; `overlap` records how many chunk
125 /// ids appeared in both.
126 HybridFusion {
127 fts_hits: u32,
128 emb_hits: u32,
129 overlap: u32,
130 },
131 /// HNSW ANN recall stats for a single retrieval call.
132 /// `used = true` means the ANN path produced candidates that fed
133 /// the RRF fusion; `used = false` means we fell back to the linear
134 /// cosine scan (empty index, dim mismatch, or any internal error).
135 /// `index_size` is the live (non-tombstoned) chunk count known to
136 /// the ANN graph at call time; `candidates` is how many top-k
137 /// results came back from `ann.search` before RRF de-duped and
138 /// re-ranked them.
139 AnnRecall {
140 used: bool,
141 index_size: u32,
142 candidates: u32,
143 },
144}
145
146/// Ordered collector for `TrajectoryStep`s. Threaded through the review
147/// pipeline as `Option<&mut TrajectoryBuilder>` so absence is a no-op.
148///
149/// Construction is `Default::default()`; callers push steps in the order
150/// they happen and finish with `into_json()` to hand the serialized
151/// payload off to the cloud `saveTrajectory` endpoint.
152#[derive(Debug, Clone, Default)]
153pub struct TrajectoryBuilder {
154 steps: Vec<TrajectoryStep>,
155}
156
157impl TrajectoryBuilder {
158 /// Start a fresh builder.
159 pub fn new() -> Self {
160 Self::default()
161 }
162
163 /// Append a single step. Preserves insertion order; callers control
164 /// the ordering so the resulting trajectory reads as a timeline.
165 pub fn push(&mut self, step: TrajectoryStep) {
166 self.steps.push(step);
167 }
168
169 /// Number of steps collected so far. Useful for tests + the final
170 /// decision step which wants to know "did anything at all happen".
171 pub const fn len(&self) -> usize {
172 self.steps.len()
173 }
174
175 /// Convenience: true when no steps have been pushed.
176 pub const fn is_empty(&self) -> bool {
177 self.steps.is_empty()
178 }
179
180 /// Borrow the steps collected so far — used by tests so they can
181 /// introspect without consuming the builder.
182 pub fn steps(&self) -> &[TrajectoryStep] {
183 &self.steps
184 }
185
186 /// Consume the builder and serialize to `serde_json::Value`. The
187 /// returned value is an array (`Value::Array`) of step objects, one
188 /// per `push`. Matches the TS side's `TrajectoryStep[]` exactly.
189 pub fn into_json(self) -> serde_json::Value {
190 serde_json::to_value(self.steps).unwrap_or(serde_json::Value::Array(vec![]))
191 }
192}
193
194#[cfg(test)]
195mod tests {
196 use super::*;
197
198 #[test]
199 fn serialize_matches_ts_shape_exactly() {
200 // The TS union uses `kind: "chunks_retrieved"` with snake_case
201 // field names. Lock the exact bytes so field drift is caught here.
202 let mut b = TrajectoryBuilder::new();
203 b.push(TrajectoryStep::ChunksRetrieved {
204 count: 2,
205 symbols: vec!["foo".into()],
206 similarity_scores: vec![0.91],
207 });
208 b.push(TrajectoryStep::SelfCheck {
209 keep_count: 3,
210 drop_count: 1,
211 avg_confidence: 0.82,
212 });
213 let value = b.into_json();
214 let arr = value.as_array().expect("top-level must be array");
215 assert_eq!(arr.len(), 2);
216
217 let first = arr[0].as_object().unwrap();
218 assert_eq!(
219 first.get("kind").and_then(|v| v.as_str()),
220 Some("chunks_retrieved")
221 );
222 assert_eq!(
223 first.get("count").and_then(serde_json::Value::as_u64),
224 Some(2)
225 );
226 assert!(first.contains_key("symbols"));
227 assert!(first.contains_key("similarity_scores"));
228
229 let second = arr[1].as_object().unwrap();
230 assert_eq!(
231 second.get("kind").and_then(|v| v.as_str()),
232 Some("self_check")
233 );
234 assert_eq!(
235 second.get("keep_count").and_then(serde_json::Value::as_u64),
236 Some(3)
237 );
238 assert_eq!(
239 second.get("drop_count").and_then(serde_json::Value::as_u64),
240 Some(1)
241 );
242 assert!(
243 (second
244 .get("avg_confidence")
245 .and_then(serde_json::Value::as_f64)
246 .unwrap()
247 - 0.82)
248 .abs()
249 < 1e-6
250 );
251 }
252
253 #[test]
254 fn llm_call_omits_raw_output_when_absent() {
255 // `raw_output` is `#[serde(skip_serializing_if = "Option::is_none")]`
256 // so absent output should not emit the key at all. This keeps the
257 // on-the-wire shape minimal for cost-sensitive deployments.
258 let mut b = TrajectoryBuilder::new();
259 b.push(TrajectoryStep::LlmCall {
260 perspective: "safety".into(),
261 input_tokens: 123,
262 output_tokens: 45,
263 raw_output: None,
264 });
265 let value = b.into_json();
266 let obj = value.as_array().unwrap()[0].as_object().unwrap();
267 assert_eq!(
268 obj.get("perspective").and_then(|v| v.as_str()),
269 Some("safety")
270 );
271 assert!(!obj.contains_key("raw_output"));
272 }
273
274 #[test]
275 fn full_pipeline_shape_matches_plan_capture_points() {
276 // Simulates the sequence emitted on a successful run: chunks, rules,
277 // past verdicts, one LLM call per perspective, self_check, then
278 // final_decision. Ordering is part of the wire contract.
279 let mut b = TrajectoryBuilder::new();
280 b.push(TrajectoryStep::ChunksRetrieved {
281 count: 4,
282 symbols: vec!["foo".into()],
283 similarity_scores: vec![],
284 });
285 b.push(TrajectoryStep::RulesApplied {
286 rule_ids: vec!["r1".into(), "r2".into()],
287 source: RuleSource::Team,
288 });
289 b.push(TrajectoryStep::PastVerdictsRecalled {
290 count: 2,
291 top_similarities: vec![],
292 recalled_items: vec![],
293 });
294 for p in ["safety", "performance", "style", "docs", "api_design"] {
295 b.push(TrajectoryStep::LlmCall {
296 perspective: p.to_owned(),
297 input_tokens: 200,
298 output_tokens: 0,
299 raw_output: None,
300 });
301 }
302 b.push(TrajectoryStep::SelfCheck {
303 keep_count: 3,
304 drop_count: 1,
305 avg_confidence: 0.87,
306 });
307 b.push(TrajectoryStep::FinalDecision {
308 issue_ids_emitted: vec!["issue-1".into(), "issue-2".into(), "issue-3".into()],
309 });
310
311 assert_eq!(b.len(), 1 + 1 + 1 + 5 + 1 + 1);
312
313 // Kind order must be exactly this. Any reorder is a breaking
314 // change to the wire format and should fail here first.
315 let kinds: Vec<&str> = b
316 .steps()
317 .iter()
318 .map(|s| match s {
319 TrajectoryStep::ChunksRetrieved { .. } => "chunks_retrieved",
320 TrajectoryStep::RulesApplied { .. } => "rules_applied",
321 TrajectoryStep::PastVerdictsRecalled { .. } => "past_verdicts_recalled",
322 TrajectoryStep::LlmCall { .. } => "llm_call",
323 TrajectoryStep::SelfCheck { .. } => "self_check",
324 TrajectoryStep::SignatureConfidenceAdjust { .. } => "signature_confidence_adjust",
325 TrajectoryStep::FinalDecision { .. } => "final_decision",
326 TrajectoryStep::McpResponseSize { .. } => "mcp_response_size",
327 TrajectoryStep::RuleHitByOrigin { .. } => "rule_hit_by_origin",
328 TrajectoryStep::RetrievalFilter { .. } => "retrieval_filter",
329 TrajectoryStep::HybridFusion { .. } => "hybrid_fusion",
330 TrajectoryStep::AnnRecall { .. } => "ann_recall",
331 })
332 .collect();
333 assert_eq!(
334 kinds,
335 vec![
336 "chunks_retrieved",
337 "rules_applied",
338 "past_verdicts_recalled",
339 "llm_call",
340 "llm_call",
341 "llm_call",
342 "llm_call",
343 "llm_call",
344 "self_check",
345 "final_decision",
346 ]
347 );
348 }
349
350 #[test]
351 fn mcp_response_size_and_rule_hit_by_origin_serialize() {
352 // Lock the on-the-wire shape so telemetry consumers can rely on stable
353 // field names.
354 let mut b = TrajectoryBuilder::new();
355 b.push(TrajectoryStep::McpResponseSize {
356 tool: "search_rules".into(),
357 total_tokens: 1234,
358 rules_injected: 3,
359 });
360 b.push(TrajectoryStep::RuleHitByOrigin {
361 manual: 1,
362 conversation: 2,
363 pr_review: 0,
364 extracted: 1,
365 cloud: 0,
366 });
367
368 let value = b.clone().into_json();
369 let arr = value.as_array().expect("top-level array");
370 assert_eq!(arr[0]["kind"], "mcp_response_size");
371 assert_eq!(arr[0]["tool"], "search_rules");
372 assert_eq!(arr[0]["total_tokens"], 1234);
373 assert_eq!(arr[0]["rules_injected"], 3);
374 assert_eq!(arr[1]["kind"], "rule_hit_by_origin");
375 assert_eq!(arr[1]["manual"], 1);
376 assert_eq!(arr[1]["conversation"], 2);
377 assert_eq!(arr[1]["pr_review"], 0);
378 assert_eq!(arr[1]["extracted"], 1);
379 assert_eq!(arr[1]["cloud"], 0);
380
381 // Round-trip back to the enum so field drift fails here.
382 let text = serde_json::to_string(&value).unwrap();
383 let parsed: Vec<TrajectoryStep> = serde_json::from_str(&text).unwrap();
384 assert_eq!(parsed, b.steps().to_vec());
385 }
386
387 #[test]
388 fn round_trip_deserialize_via_serde_json() {
389 // Ensures the on-the-wire bytes deserialize back into the same
390 // enum variants — locks the `tag = "kind"` contract.
391 let mut b = TrajectoryBuilder::new();
392 b.push(TrajectoryStep::PastVerdictsRecalled {
393 count: 4,
394 top_similarities: vec![0.95, 0.88, 0.80, 0.72],
395 recalled_items: vec![RecalledVerdict {
396 id: "verdict-1".into(),
397 title: "avoid unwrap in request handlers".into(),
398 similarity: 0.95,
399 excerpt: "fn handler() { ... .unwrap() ... }".into(),
400 }],
401 });
402 b.push(TrajectoryStep::RulesApplied {
403 rule_ids: vec!["r1".into(), "r2".into()],
404 source: RuleSource::Global,
405 });
406 b.push(TrajectoryStep::FinalDecision {
407 issue_ids_emitted: vec!["issue-1".into()],
408 });
409
410 let value = b.clone().into_json();
411 let text = serde_json::to_string(&value).unwrap();
412 let parsed: Vec<TrajectoryStep> = serde_json::from_str(&text).unwrap();
413 assert_eq!(parsed, b.steps().to_vec());
414 }
415}