Skip to main content

sharpebench_protocol/
lib.rs

1//! The language-agnostic agent ⇄ harness protocol.
2//!
3//! Agents are **external** — a container or HTTP endpoint, in any language — not
4//! Rust code. Each decision step the harness sends a [`MarketObservation`] (JSON)
5//! and the agent replies with a [`Decision`] (JSON). Keeping this surface tiny and
6//! stable is what lets any vendor compete (and is the whole adoption story).
7//!
8//! All observations are **point-in-time**: `close_history`, `fundamentals` and
9//! `news` only ever contain information available at or before `date`.
10#![forbid(unsafe_code)]
11
12use std::collections::BTreeMap;
13
14use serde::{Deserialize, Serialize};
15
16/// What the agent sees at one decision point.
17#[derive(Clone, Debug, Serialize, Deserialize)]
18pub struct MarketObservation {
19    /// ISO-8601 date of the decision point.
20    pub date: String,
21    pub cash: f64,
22    pub symbols: Vec<SymbolSnapshot>,
23    pub portfolio: Vec<PositionState>,
24}
25
26/// Point-in-time data for one instrument.
27#[derive(Clone, Debug, Serialize, Deserialize)]
28pub struct SymbolSnapshot {
29    pub symbol: String,
30    /// Trailing closes up to and including `date` (oldest first).
31    pub close_history: Vec<f64>,
32    /// Named fundamental fields (e.g. `pe`, `revenue_yoy`). Empty if unavailable.
33    #[serde(default)]
34    pub fundamentals: BTreeMap<String, f64>,
35    /// Headlines published on or before `date`.
36    #[serde(default)]
37    pub news: Vec<String>,
38}
39
40/// The agent's current holding in one instrument.
41#[derive(Clone, Debug, Serialize, Deserialize)]
42pub struct PositionState {
43    pub symbol: String,
44    pub shares: f64,
45    pub avg_price: f64,
46}
47
48/// What the agent returns.
49#[derive(Clone, Debug, Serialize, Deserialize)]
50pub struct Decision {
51    pub orders: Vec<Order>,
52    /// Free-text rationale, captured into the trajectory for auditability.
53    #[serde(default)]
54    pub reasoning: String,
55}
56
57/// A single per-instrument instruction.
58#[derive(Clone, Debug, Serialize, Deserialize)]
59pub struct Order {
60    pub symbol: String,
61    pub action: Action,
62    /// Target portfolio weight for this symbol in [0, 1] (signed for shorts).
63    pub target_weight: f64,
64    /// Stated conviction in [0, 1]; scored for calibration.
65    #[serde(default = "default_confidence")]
66    pub confidence: f64,
67    /// Optional one-line rationale for *this* order, captured into the run trace
68    /// (audit trail). Defaults to empty so existing agents need no change.
69    #[serde(default)]
70    pub rationale: String,
71}
72
73/// Discrete action label (sizing is carried by `target_weight`).
74#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)]
75#[serde(rename_all = "snake_case")]
76pub enum Action {
77    Buy,
78    Sell,
79    Hold,
80    Close,
81}
82
83fn default_confidence() -> f64 {
84    0.5
85}
86
87/// One captured decision step of a single backtest run: the agent's *raw* output
88/// at one point-in-time observation. This is the persisted artifact — it holds the
89/// agent's [`Decision`] (orders, sizing, conviction, reasoning) tagged with the
90/// observation it was made against, and deliberately stores **no** returns, NAV, or
91/// any self-reported metric. The score is recomputed by replaying these decisions
92/// through the engine, never read from the agent's word.
93#[derive(Clone, Debug, Serialize, Deserialize)]
94pub struct DecisionStep {
95    /// 0-based step index within the run's window (`window.start + step` is the
96    /// dataset index the observation was drawn from).
97    pub step: usize,
98    /// Stable id of the point-in-time observation this decision answered — the
99    /// observation's ISO date. Lets a verifier confirm the decision lines up with
100    /// the frozen dataset's bar at the replayed step.
101    pub observation_id: String,
102    /// The agent's raw decision at this step (orders + reasoning).
103    pub decision: Decision,
104}
105
106/// One captured backtest run (a single window × seed): the ordered sequence of the
107/// agent's raw decision steps, plus the (window, seed) coordinates needed to replay
108/// it through the identical point-in-time engine path.
109#[derive(Clone, Debug, Serialize, Deserialize)]
110pub struct RunTrajectory {
111    /// Inclusive window start (dataset index of the first decision step).
112    pub window_start: usize,
113    /// Exclusive window end.
114    pub window_end: usize,
115    /// Execution seed the run was driven with (governs slippage noise on replay).
116    pub seed: u64,
117    /// The raw decisions, in step order.
118    pub steps: Vec<DecisionStep>,
119}
120
121/// An agent's full captured trajectory: every (window × seed) run's raw decisions.
122/// Serde-(de)serializable to JSON; this is the on-disk artifact a separate verifier
123/// ingests to recompute the score from raw decisions alone.
124#[derive(Clone, Debug, Serialize, Deserialize)]
125pub struct AgentTrajectory {
126    pub agent_id: String,
127    /// In-sample search budget the agent declared (mirrors `AgentSubmission`), so a
128    /// recomputed submission carries the same deflation footprint.
129    #[serde(default)]
130    pub in_sample_trials: u32,
131    /// One captured run per (window, seed), in the same order the harness produced
132    /// them (window-major: all seeds of window 0, then window 1, …).
133    pub runs: Vec<RunTrajectory>,
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139
140    #[test]
141    fn observation_and_decision_roundtrip() {
142        let obs = MarketObservation {
143            date: "2025-01-01".to_string(),
144            cash: 1.0,
145            symbols: vec![SymbolSnapshot {
146                symbol: "A".to_string(),
147                close_history: vec![1.0, 2.0],
148                fundamentals: Default::default(),
149                news: vec!["headline".to_string()],
150            }],
151            portfolio: vec![PositionState {
152                symbol: "A".to_string(),
153                shares: 1.0,
154                avg_price: 2.0,
155            }],
156        };
157        let back: MarketObservation =
158            serde_json::from_str(&serde_json::to_string(&obs).unwrap()).unwrap();
159        assert_eq!(back.symbols[0].symbol, "A");
160
161        let d = Decision {
162            orders: vec![Order {
163                symbol: "A".to_string(),
164                action: Action::Buy,
165                target_weight: 0.5,
166                confidence: 0.9,
167                rationale: "trailing breakout".to_string(),
168            }],
169            reasoning: "r".to_string(),
170        };
171        let db: Decision = serde_json::from_str(&serde_json::to_string(&d).unwrap()).unwrap();
172        assert_eq!(db.orders[0].action, Action::Buy);
173        // The per-order rationale survives the JSON round-trip into the trajectory.
174        assert_eq!(db.orders[0].rationale, "trailing breakout");
175
176        // Older agents that omit `rationale` still deserialize (default empty).
177        let legacy = r#"{"orders":[{"symbol":"A","action":"buy","target_weight":0.5}]}"#;
178        let parsed: Decision = serde_json::from_str(legacy).unwrap();
179        assert_eq!(parsed.orders[0].rationale, "");
180        assert!((parsed.orders[0].confidence - 0.5).abs() < 1e-12);
181    }
182
183    #[test]
184    fn trajectory_roundtrips_through_json() {
185        let traj = AgentTrajectory {
186            agent_id: "a".to_string(),
187            in_sample_trials: 7,
188            runs: vec![RunTrajectory {
189                window_start: 20,
190                window_end: 30,
191                seed: 3,
192                steps: vec![DecisionStep {
193                    step: 0,
194                    observation_id: "2025-001".to_string(),
195                    decision: Decision {
196                        orders: vec![Order {
197                            symbol: "A".to_string(),
198                            action: Action::Buy,
199                            target_weight: 0.25,
200                            confidence: 0.8,
201                            rationale: String::new(),
202                        }],
203                        reasoning: "r".to_string(),
204                    },
205                }],
206            }],
207        };
208        let back: AgentTrajectory =
209            serde_json::from_str(&serde_json::to_string(&traj).unwrap()).unwrap();
210        assert_eq!(back.agent_id, "a");
211        assert_eq!(back.in_sample_trials, 7);
212        assert_eq!(back.runs[0].seed, 3);
213        assert_eq!(back.runs[0].steps[0].observation_id, "2025-001");
214        assert_eq!(back.runs[0].steps[0].decision.orders[0].target_weight, 0.25);
215    }
216}