Skip to main content

bamboo_engine/runtime/
goal_state.rs

1//! Durable per-session goal state for the Codex-style goal loop.
2//!
3//! Bamboo's goal feature is a hybrid of two ideas:
4//!
5//! 1. The **main agent self-reports** completion via the `update_goal` tool
6//!    (mirroring OpenAI Codex's `update_goal`), and the runtime relentlessly
7//!    re-injects a rigorous completion-audit continuation prompt until the agent
8//!    declares the goal complete (or blocked, or the continuation budget runs
9//!    out).
10//! 2. The existing **Gold evaluator** is kept as a *side-channel double-check*:
11//!    at the terminal point — when the run is actually about to stop — it
12//!    re-verifies achievement and can veto a premature completion.
13//!
14//! This module owns the durable record that ties the two together. It lives in
15//! `session.metadata` under [`GOAL_STATE_METADATA_KEY`] as a single JSON value
16//! (the established Bamboo pattern for structured, session-scoped state — see
17//! `gold_config` and the `gold.*` keys), so it round-trips through the normal
18//! session save/load path with no new storage entity.
19//!
20//! Crucially, the double-check verdicts are persisted into
21//! [`GoalState::eval_history`] so the goal record carries its own evaluation
22//! trail ("goal 持久化也要加入评测内容").
23
24use bamboo_agent_core::Session;
25use chrono::Utc;
26use serde::{Deserialize, Serialize};
27
28use crate::runtime::gold_evaluation::GoldEvaluationResult;
29
30/// Session metadata key holding the serialized [`GoalState`] JSON blob.
31pub const GOAL_STATE_METADATA_KEY: &str = "goal.state";
32
33/// Upper bound on retained evaluation records, so a long autonomous run cannot
34/// grow the persisted blob without limit. The newest records are kept.
35const MAX_EVAL_HISTORY: usize = 50;
36
37/// Runtime status of the active session goal.
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
39#[serde(rename_all = "snake_case")]
40pub enum GoalRuntimeStatus {
41    /// The agent is actively pursuing the goal; the loop will keep continuing.
42    Active,
43    /// The goal has been achieved (agent declared + double-check confirmed, or
44    /// the evaluator was confidently achieved).
45    Complete,
46    /// The agent explicitly gave up after the blocked discipline, or the
47    /// evaluator reported a concrete blocker.
48    Blocked,
49    /// The evaluator reported that user input is the true next blocker.
50    NeedInput,
51    /// The continuation budget was exhausted before completion.
52    BudgetLimited,
53}
54
55impl GoalRuntimeStatus {
56    pub fn as_str(self) -> &'static str {
57        match self {
58            Self::Active => "active",
59            Self::Complete => "complete",
60            Self::Blocked => "blocked",
61            Self::NeedInput => "need_input",
62            Self::BudgetLimited => "budget_limited",
63        }
64    }
65
66    /// Whether the loop should keep working on this goal.
67    pub fn is_active(self) -> bool {
68        matches!(self, Self::Active)
69    }
70}
71
72/// What the agent last declared via the `update_goal` tool.
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
74#[serde(rename_all = "snake_case")]
75pub enum GoalDeclaredStatus {
76    Complete,
77    Blocked,
78}
79
80impl GoalDeclaredStatus {
81    pub fn as_str(self) -> &'static str {
82        match self {
83            Self::Complete => "complete",
84            Self::Blocked => "blocked",
85        }
86    }
87}
88
89/// A single persisted double-check verdict from the Gold evaluator.
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct GoalEvalRecord {
92    pub checkpoint: String,
93    pub iteration: u32,
94    pub decision: String,
95    pub confidence: String,
96    pub reasoning: String,
97    #[serde(default, skip_serializing_if = "Vec::is_empty")]
98    pub missing_information: Vec<String>,
99    #[serde(default, skip_serializing_if = "Option::is_none")]
100    pub next_action: Option<String>,
101    pub recorded_at: String,
102}
103
104impl GoalEvalRecord {
105    /// Build a record from a Gold double-check verdict.
106    pub fn from_evaluation(result: &GoldEvaluationResult) -> Self {
107        Self {
108            checkpoint: result.checkpoint.as_str().to_string(),
109            iteration: result.iteration,
110            decision: result.decision.as_str().to_string(),
111            confidence: result.confidence.as_str().to_string(),
112            reasoning: result.reasoning.clone(),
113            missing_information: result.missing_information.clone(),
114            next_action: result.next_action.clone(),
115            recorded_at: Utc::now().to_rfc3339(),
116        }
117    }
118}
119
120/// Durable goal record persisted in `session.metadata`.
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct GoalState {
123    /// The user's objective text (copied from the active goal config).
124    pub objective: String,
125    pub status: GoalRuntimeStatus,
126    /// The agent's most recent self-report, if any. Cleared once acted upon.
127    #[serde(default, skip_serializing_if = "Option::is_none")]
128    pub declared_status: Option<GoalDeclaredStatus>,
129    #[serde(default, skip_serializing_if = "Option::is_none")]
130    pub declared_at_round: Option<u32>,
131    /// How many autonomous continuations have fired toward this goal.
132    #[serde(default)]
133    pub continuation_count: u32,
134    /// Persisted double-check verdicts (the "评测内容").
135    #[serde(default)]
136    pub eval_history: Vec<GoalEvalRecord>,
137    pub created_at: String,
138    pub updated_at: String,
139}
140
141impl GoalState {
142    fn new(objective: impl Into<String>) -> Self {
143        let now = Utc::now().to_rfc3339();
144        Self {
145            objective: objective.into(),
146            status: GoalRuntimeStatus::Active,
147            declared_status: None,
148            declared_at_round: None,
149            continuation_count: 0,
150            eval_history: Vec::new(),
151            created_at: now.clone(),
152            updated_at: now,
153        }
154    }
155
156    /// Record the agent's `update_goal` self-report.
157    pub fn declare(&mut self, status: GoalDeclaredStatus, round: u32) {
158        self.declared_status = Some(status);
159        self.declared_at_round = Some(round);
160    }
161
162    /// Clear any pending self-report (after it has been acted upon).
163    pub fn clear_declaration(&mut self) {
164        self.declared_status = None;
165        self.declared_at_round = None;
166    }
167
168    /// Append a double-check verdict, trimming to [`MAX_EVAL_HISTORY`].
169    pub fn push_eval(&mut self, record: GoalEvalRecord) {
170        self.eval_history.push(record);
171        if self.eval_history.len() > MAX_EVAL_HISTORY {
172            let overflow = self.eval_history.len() - MAX_EVAL_HISTORY;
173            self.eval_history.drain(0..overflow);
174        }
175    }
176}
177
178/// Read the persisted goal state, if present and parseable.
179pub fn read_goal_state(session: &Session) -> Option<GoalState> {
180    let raw = session.metadata.get(GOAL_STATE_METADATA_KEY)?;
181    serde_json::from_str::<GoalState>(raw).ok()
182}
183
184/// Persist the goal state into `session.metadata` (touching `updated_at`).
185pub fn write_goal_state(session: &mut Session, mut state: GoalState) {
186    state.updated_at = Utc::now().to_rfc3339();
187    match serde_json::to_string(&state) {
188        Ok(json) => {
189            session
190                .metadata
191                .insert(GOAL_STATE_METADATA_KEY.to_string(), json);
192        }
193        Err(error) => {
194            // Serializing a plain data struct effectively never fails, but if it
195            // ever does, the on-disk goal state silently goes stale — log loudly
196            // rather than swallow it.
197            tracing::warn!(
198                "failed to serialize goal state for session {}: {error}",
199                session.id
200            );
201        }
202    }
203}
204
205/// Read the existing goal state, or create a fresh one bound to `objective`.
206///
207/// When a state already exists but the objective changed (the user edited the
208/// goal), the objective is refreshed and the declaration cleared so a stale
209/// "complete" cannot leak across objectives.
210pub fn ensure_goal_state(session: &Session, objective: &str) -> GoalState {
211    match read_goal_state(session) {
212        Some(mut state) => {
213            if state.objective != objective {
214                // A new objective is a fresh goal: reset the whole runtime, not
215                // just the status — the continuation budget and eval trail
216                // belonged to the previous objective.
217                state.objective = objective.to_string();
218                state.status = GoalRuntimeStatus::Active;
219                state.continuation_count = 0;
220                state.eval_history.clear();
221                state.clear_declaration();
222            }
223            state
224        }
225        None => GoalState::new(objective),
226    }
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232    use bamboo_agent_core::Session;
233
234    #[test]
235    fn round_trips_through_metadata() {
236        let mut session = Session::new("s1", "model");
237        let mut state = GoalState::new("ship the feature");
238        state.declare(GoalDeclaredStatus::Complete, 4);
239        state.continuation_count = 2;
240        state.push_eval(GoalEvalRecord {
241            checkpoint: "terminal".to_string(),
242            iteration: 4,
243            decision: "continue".to_string(),
244            confidence: "high".to_string(),
245            reasoning: "still missing tests".to_string(),
246            missing_information: vec!["the e2e test".to_string()],
247            next_action: Some("write the e2e test".to_string()),
248            recorded_at: "2026-06-15T00:00:00Z".to_string(),
249        });
250
251        write_goal_state(&mut session, state);
252        let loaded = read_goal_state(&session).expect("state persists");
253
254        assert_eq!(loaded.objective, "ship the feature");
255        assert_eq!(loaded.declared_status, Some(GoalDeclaredStatus::Complete));
256        assert_eq!(loaded.declared_at_round, Some(4));
257        assert_eq!(loaded.continuation_count, 2);
258        assert_eq!(loaded.eval_history.len(), 1);
259        assert_eq!(loaded.eval_history[0].next_action.as_deref(), Some("write the e2e test"));
260    }
261
262    #[test]
263    fn ensure_resets_when_objective_changes() {
264        let mut session = Session::new("s1", "model");
265        let mut state = GoalState::new("old objective");
266        state.declare(GoalDeclaredStatus::Complete, 1);
267        state.status = GoalRuntimeStatus::Complete;
268        state.continuation_count = 2;
269        state.push_eval(GoalEvalRecord {
270            checkpoint: "terminal".to_string(),
271            iteration: 1,
272            decision: "achieved".to_string(),
273            confidence: "high".to_string(),
274            reasoning: "old".to_string(),
275            missing_information: Vec::new(),
276            next_action: None,
277            recorded_at: "t".to_string(),
278        });
279        write_goal_state(&mut session, state);
280
281        let refreshed = ensure_goal_state(&session, "new objective");
282        assert_eq!(refreshed.objective, "new objective");
283        assert_eq!(refreshed.status, GoalRuntimeStatus::Active);
284        assert_eq!(refreshed.declared_status, None);
285        // A new objective also resets the budget + eval trail of the old one.
286        assert_eq!(refreshed.continuation_count, 0);
287        assert!(refreshed.eval_history.is_empty());
288    }
289
290    #[test]
291    fn push_eval_trims_history() {
292        let mut state = GoalState::new("obj");
293        for i in 0..(MAX_EVAL_HISTORY + 10) {
294            state.push_eval(GoalEvalRecord {
295                checkpoint: "terminal".to_string(),
296                iteration: i as u32,
297                decision: "continue".to_string(),
298                confidence: "low".to_string(),
299                reasoning: format!("round {i}"),
300                missing_information: Vec::new(),
301                next_action: None,
302                recorded_at: "t".to_string(),
303            });
304        }
305        assert_eq!(state.eval_history.len(), MAX_EVAL_HISTORY);
306        // Oldest entries were dropped; the last one is the newest.
307        assert_eq!(
308            state.eval_history.last().unwrap().reasoning,
309            format!("round {}", MAX_EVAL_HISTORY + 9)
310        );
311    }
312}