sharpebench_protocol/lib.rs
1//! The language-agnostic agent ⇄ harness protocol.
2//!
3//! Agents are **external** — a container or HTTP endpoint, in any language — not
4//! Rust code. Each decision step the harness sends a [`MarketObservation`] (JSON)
5//! and the agent replies with a [`Decision`] (JSON). Keeping this surface tiny and
6//! stable is what lets any vendor compete (and is the whole adoption story).
7//!
8//! All observations are **point-in-time**: `close_history`, `fundamentals` and
9//! `news` only ever contain information available at or before `date`.
10#![forbid(unsafe_code)]
11
12use std::collections::BTreeMap;
13
14use serde::{Deserialize, Serialize};
15
16/// What the agent sees at one decision point.
17#[derive(Clone, Debug, Serialize, Deserialize)]
18pub struct MarketObservation {
19 /// ISO-8601 date of the decision point.
20 pub date: String,
21 pub cash: f64,
22 pub symbols: Vec<SymbolSnapshot>,
23 pub portfolio: Vec<PositionState>,
24}
25
26/// Point-in-time data for one instrument.
27#[derive(Clone, Debug, Serialize, Deserialize)]
28pub struct SymbolSnapshot {
29 pub symbol: String,
30 /// Trailing closes up to and including `date` (oldest first).
31 pub close_history: Vec<f64>,
32 /// Named fundamental fields (e.g. `pe`, `revenue_yoy`). Empty if unavailable.
33 #[serde(default)]
34 pub fundamentals: BTreeMap<String, f64>,
35 /// Headlines published on or before `date`.
36 #[serde(default)]
37 pub news: Vec<String>,
38}
39
40/// The agent's current holding in one instrument.
41#[derive(Clone, Debug, Serialize, Deserialize)]
42pub struct PositionState {
43 pub symbol: String,
44 pub shares: f64,
45 pub avg_price: f64,
46}
47
48/// What the agent returns.
49#[derive(Clone, Debug, Serialize, Deserialize)]
50pub struct Decision {
51 pub orders: Vec<Order>,
52 /// Free-text rationale, captured into the trajectory for auditability.
53 #[serde(default)]
54 pub reasoning: String,
55}
56
57/// A single per-instrument instruction.
58#[derive(Clone, Debug, Serialize, Deserialize)]
59pub struct Order {
60 pub symbol: String,
61 pub action: Action,
62 /// Target portfolio weight for this symbol in [0, 1] (signed for shorts).
63 pub target_weight: f64,
64 /// Stated conviction in [0, 1]; scored for calibration.
65 #[serde(default = "default_confidence")]
66 pub confidence: f64,
67 /// Optional one-line rationale for *this* order, captured into the run trace
68 /// (audit trail). Defaults to empty so existing agents need no change.
69 #[serde(default)]
70 pub rationale: String,
71}
72
73/// Discrete action label (sizing is carried by `target_weight`).
74#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)]
75#[serde(rename_all = "snake_case")]
76pub enum Action {
77 Buy,
78 Sell,
79 Hold,
80 Close,
81}
82
83fn default_confidence() -> f64 {
84 0.5
85}
86
87/// One captured decision step of a single backtest run: the agent's *raw* output
88/// at one point-in-time observation. This is the persisted artifact — it holds the
89/// agent's [`Decision`] (orders, sizing, conviction, reasoning) tagged with the
90/// observation it was made against, and deliberately stores **no** returns, NAV, or
91/// any self-reported metric. The score is recomputed by replaying these decisions
92/// through the engine, never read from the agent's word.
93#[derive(Clone, Debug, Serialize, Deserialize)]
94pub struct DecisionStep {
95 /// 0-based step index within the run's window (`window.start + step` is the
96 /// dataset index the observation was drawn from).
97 pub step: usize,
98 /// Stable id of the point-in-time observation this decision answered — the
99 /// observation's ISO date. Lets a verifier confirm the decision lines up with
100 /// the frozen dataset's bar at the replayed step.
101 pub observation_id: String,
102 /// The agent's raw decision at this step (orders + reasoning).
103 pub decision: Decision,
104}
105
106/// One captured backtest run (a single window × seed): the ordered sequence of the
107/// agent's raw decision steps, plus the (window, seed) coordinates needed to replay
108/// it through the identical point-in-time engine path.
109#[derive(Clone, Debug, Serialize, Deserialize)]
110pub struct RunTrajectory {
111 /// Inclusive window start (dataset index of the first decision step).
112 pub window_start: usize,
113 /// Exclusive window end.
114 pub window_end: usize,
115 /// Execution seed the run was driven with (governs slippage noise on replay).
116 pub seed: u64,
117 /// The raw decisions, in step order.
118 pub steps: Vec<DecisionStep>,
119}
120
121/// An agent's full captured trajectory: every (window × seed) run's raw decisions.
122/// Serde-(de)serializable to JSON; this is the on-disk artifact a separate verifier
123/// ingests to recompute the score from raw decisions alone.
124#[derive(Clone, Debug, Serialize, Deserialize)]
125pub struct AgentTrajectory {
126 pub agent_id: String,
127 /// In-sample search budget the agent declared (mirrors `AgentSubmission`), so a
128 /// recomputed submission carries the same deflation footprint.
129 #[serde(default)]
130 pub in_sample_trials: u32,
131 /// One captured run per (window, seed), in the same order the harness produced
132 /// them (window-major: all seeds of window 0, then window 1, …).
133 pub runs: Vec<RunTrajectory>,
134}
135
136#[cfg(test)]
137mod tests {
138 use super::*;
139
140 #[test]
141 fn observation_and_decision_roundtrip() {
142 let obs = MarketObservation {
143 date: "2025-01-01".to_string(),
144 cash: 1.0,
145 symbols: vec![SymbolSnapshot {
146 symbol: "A".to_string(),
147 close_history: vec![1.0, 2.0],
148 fundamentals: Default::default(),
149 news: vec!["headline".to_string()],
150 }],
151 portfolio: vec![PositionState {
152 symbol: "A".to_string(),
153 shares: 1.0,
154 avg_price: 2.0,
155 }],
156 };
157 let back: MarketObservation =
158 serde_json::from_str(&serde_json::to_string(&obs).unwrap()).unwrap();
159 assert_eq!(back.symbols[0].symbol, "A");
160
161 let d = Decision {
162 orders: vec![Order {
163 symbol: "A".to_string(),
164 action: Action::Buy,
165 target_weight: 0.5,
166 confidence: 0.9,
167 rationale: "trailing breakout".to_string(),
168 }],
169 reasoning: "r".to_string(),
170 };
171 let db: Decision = serde_json::from_str(&serde_json::to_string(&d).unwrap()).unwrap();
172 assert_eq!(db.orders[0].action, Action::Buy);
173 // The per-order rationale survives the JSON round-trip into the trajectory.
174 assert_eq!(db.orders[0].rationale, "trailing breakout");
175
176 // Older agents that omit `rationale` still deserialize (default empty).
177 let legacy = r#"{"orders":[{"symbol":"A","action":"buy","target_weight":0.5}]}"#;
178 let parsed: Decision = serde_json::from_str(legacy).unwrap();
179 assert_eq!(parsed.orders[0].rationale, "");
180 assert!((parsed.orders[0].confidence - 0.5).abs() < 1e-12);
181 }
182
183 #[test]
184 fn trajectory_roundtrips_through_json() {
185 let traj = AgentTrajectory {
186 agent_id: "a".to_string(),
187 in_sample_trials: 7,
188 runs: vec![RunTrajectory {
189 window_start: 20,
190 window_end: 30,
191 seed: 3,
192 steps: vec![DecisionStep {
193 step: 0,
194 observation_id: "2025-001".to_string(),
195 decision: Decision {
196 orders: vec![Order {
197 symbol: "A".to_string(),
198 action: Action::Buy,
199 target_weight: 0.25,
200 confidence: 0.8,
201 rationale: String::new(),
202 }],
203 reasoning: "r".to_string(),
204 },
205 }],
206 }],
207 };
208 let back: AgentTrajectory =
209 serde_json::from_str(&serde_json::to_string(&traj).unwrap()).unwrap();
210 assert_eq!(back.agent_id, "a");
211 assert_eq!(back.in_sample_trials, 7);
212 assert_eq!(back.runs[0].seed, 3);
213 assert_eq!(back.runs[0].steps[0].observation_id, "2025-001");
214 assert_eq!(back.runs[0].steps[0].decision.orders[0].target_weight, 0.25);
215 }
216}