Skip to main content

harness/
event.rs

1use std::path::PathBuf;
2
3use serde_json::Value;
4use tokio_util::sync::CancellationToken;
5
6use crate::model::{ChatMessage, UserAttachment};
7
8/// Token / cache usage reported by the model client at the end of a turn.
9///
10/// Kept independent of any wire format (no `grpc::*` import) so the harness
11/// crate stays a pure domain library. The `core::native_adapter` layer
12/// converts this to the proto `SessionUsage` shape on the way out.
13#[derive(Debug, Clone, PartialEq, Default)]
14pub struct HarnessUsage {
15    /// Total input tokens spent this turn (main steps + compaction
16    /// summarize calls combined). Same field the wire `SessionUsage`
17    /// surfaces today — keeps HR's existing dashboards correct.
18    pub input_tokens: u64,
19    /// Total output tokens this turn (main + compaction).
20    pub output_tokens: u64,
21    /// Cache-read tokens (Anthropic). 0 = unknown / not applicable.
22    pub cache_read_input_tokens: u64,
23    /// Cache-create tokens (Anthropic). 0 = unknown / not applicable.
24    pub cache_creation_input_tokens: u64,
25    /// Subset of `input_tokens` spent specifically on compaction
26    /// `summarize` calls. 0 = no compaction ran this turn (or it ran
27    /// but the provider didn't report usage). Surfaced in
28    /// tracing for now; not yet wired to proto `SessionUsage` —
29    /// when HR wants the breakdown on the wire, add fields to the
30    /// proto + map in `native_adapter::harness_usage_to_proto`.
31    pub compaction_input_tokens: u64,
32    /// Subset of `output_tokens` spent on compaction.
33    pub compaction_output_tokens: u64,
34}
35
36#[derive(Debug, Clone, PartialEq)]
37pub enum HarnessInternalEvent {
38    AssistantTextChunk {
39        msg_id: String,
40        delta: String,
41    },
42    AssistantThinkingChunk {
43        msg_id: String,
44        delta: String,
45    },
46    ToolCall {
47        id: String,
48        name: String,
49        input: Value,
50    },
51    ToolResult {
52        id: String,
53        output: Result<Value, String>,
54    },
55    /// Compaction fired between steps and the running history was
56    /// folded. Counts let HR estimate how aggressive compaction is at
57    /// this point in the turn (and whether to raise alerts). Token
58    /// counts come from the harness's own estimator — see
59    /// `compaction::estimate_messages_tokens` — and are approximate.
60    ///
61    /// Native_adapter currently does NOT project this onto an
62    /// `AdapterEvent` (no proto field reserved for it yet). It surfaces
63    /// only through structured tracing on the RD side; tests can still
64    /// assert on it in the harness's mpsc channel.
65    CompactionApplied {
66        original_message_count: usize,
67        compacted_message_count: usize,
68        original_tokens: u64,
69        compacted_tokens: u64,
70    },
71    TurnEnd {
72        stop_reason: String,
73        usage: Option<HarnessUsage>,
74        /// Final messages history at turn end (post-compaction, includes
75        /// the assistant's reply and any tool round-trips). RD captures
76        /// this snapshot to seed the next dispatch's `prior_messages`
77        /// so multi-turn conversations in the same RD process don't
78        /// re-prompt from scratch.
79        ///
80        /// This is **internal-only** state: it never lands on the wire.
81        /// `native_adapter::HarnessEventAdapter::ingest` reads it via a
82        /// side-channel (history_handle) and drops it before projecting
83        /// to `AdapterEvent::TurnEnd`. Tests can still assert on the
84        /// raw `HarnessInternalEvent` directly.
85        final_messages: Vec<ChatMessage>,
86    },
87}
88
89#[derive(Debug, Clone)]
90pub struct NativeTurnInput {
91    /// The user-facing prompt that triggered this turn. Lands as the first
92    /// `ChatMessage::User.content` the model sees.
93    pub prompt_text: String,
94    /// Pre-composed system prompt (e.g. `spec_snapshot.system_prompt` +
95    /// `driver.append_system_prompt`). `None` ⇒ no system message sent.
96    pub system_prompt: Option<String>,
97    /// Non-text attachments lifted from the inbound `UserMessagePayload`
98    /// content blocks. Empty Vec ⇒ pure-text prompt. Lands on the first
99    /// `ChatMessage::User.attachments` so each provider's projection
100    /// can render them as image / file content blocks.
101    pub attachments: Vec<UserAttachment>,
102    /// Optional cancellation handle. When fired, the harness loop
103    /// short-circuits at the next stream-chunk await (or before the
104    /// next step starts) and emits `TurnEnd { stop_reason: "interrupt" }`.
105    /// `None` ⇒ harness cannot be cancelled mid-flight (fine for tests
106    /// and for fire-and-forget turns); production wires this through
107    /// from RD's `active_native_cancel`.
108    pub cancel_token: Option<CancellationToken>,
109    /// Prior `messages` history for **in-memory** mode (`context_path = None`).
110    /// Ignored when `context_path` is `Some` — harness loads history from
111    /// the JSONL file instead.
112    ///
113    /// Empty Vec + `context_path = None` = fresh in-memory conversation.
114    pub prior_messages: Vec<ChatMessage>,
115
116    /// Absolute path to harness's context JSONL.
117    ///
118    /// * `Some(path)` — **persistent mode**: harness loads prior messages
119    ///   from this file at turn start (creating it on first use), appends
120    ///   new messages incrementally, and rewrites it on compaction.
121    ///   `prior_messages` is ignored. `TurnEnd.final_messages` is empty.
122    ///
123    /// * `None` — **in-memory mode**: `prior_messages` is used as the seed;
124    ///   harness never touches the filesystem. `TurnEnd.final_messages`
125    ///   carries the full history snapshot for the caller to persist.
126    ///   Suitable for runtime-driver (manages history in RAM) and tests.
127    pub context_path: Option<PathBuf>,
128}
129
130impl PartialEq for NativeTurnInput {
131    fn eq(&self, other: &Self) -> bool {
132        // CancellationToken doesn't implement PartialEq (it's a runtime
133        // handle, not a value). Two NativeTurnInputs are considered
134        // equal iff the *value-typed* fields match; the cancel handle
135        // is opaque ambient state.
136        self.prompt_text == other.prompt_text
137            && self.system_prompt == other.system_prompt
138            && self.attachments == other.attachments
139            && self.prior_messages == other.prior_messages
140            && self.context_path == other.context_path
141    }
142}
143
144/// Categorised native-path failure. Each variant maps 1:1 to an
145/// `acpx::NativeFaultCategory` in `core::native_adapter::native_error_to_invoker`,
146/// which is in turn projected to a `RuntimeError` by `core::error::
147/// native_fault_to_runtime_error`. Keeping the buckets named here lets the
148/// harness crate produce structured errors without taking on any grpc /
149/// proto dependency.
150#[derive(Debug, thiserror::Error)]
151pub enum NativeHarnessError {
152    #[error("native harness failed: {0}")]
153    Failed(String),
154
155    #[error("native harness event encode failed: {0}")]
156    Encode(String),
157
158    #[error("native harness channel closed")]
159    ChannelClosed,
160
161    /// LLM provider returned 429.
162    #[error("model rate limit: {0}")]
163    ModelRateLimit(String),
164
165    /// LLM provider returned 401 / 403.
166    #[error("model auth error: {0}")]
167    ModelAuth(String),
168
169    /// Prompt + completion exceeded the model's context window.
170    #[error("model context overflow: {0}")]
171    ModelContextOverflow(String),
172
173    /// Transport-level failure (DNS / TCP / TLS / truncated body).
174    #[error("model network error: {0}")]
175    ModelNetwork(String),
176
177    /// HTTP 400 that is a config error (wrong model name, invalid params).
178    /// Not retryable — the caller must fix their configuration.
179    #[error("model bad request: {0}")]
180    ModelBadRequest(String),
181
182    /// HTTP 5xx or transient server error. Retryable.
183    #[error("model server error: {0}")]
184    ModelServerError(String),
185
186    /// Any other model-side failure not captured above.
187    #[error("model other error: {0}")]
188    ModelOther(String),
189
190    /// Sandbox / tool runtime hard error (process spawn refused, envd
191    /// unreachable). NOT a `ToolFailure` — those are domain failures the
192    /// model can observe and recover from; this is RD-side infrastructure
193    /// breaking under the tool.
194    #[error("tool runtime error: {0}")]
195    ToolRuntime(String),
196}