Skip to main content

split_brain_harness/
capability.rs

1/// Phase 1 — design-only schema for the Ephemeral Tool Forge.
2///
3/// The model may emit a capability_request field alongside its telemetry output.
4/// No tool is generated or executed in Phase 1. The request is parsed, traced,
5/// and stored in HarnessResult. The execution supervisor lives in a later phase.
6///
7/// Reference: EPHEMERAL_TOOL_FORGE_DESIGN.md
8use serde::{Deserialize, Serialize};
9
10// ---------------------------------------------------------------------------
11// Model output — what the model emits when it requests a capability
12// ---------------------------------------------------------------------------
13
14/// Constraints the model declares on the tool it is requesting.
15/// All fields default to the most restrictive value if absent.
16#[derive(Debug, Serialize, Deserialize, Clone)]
17#[serde(deny_unknown_fields)]
18pub struct CapabilityConstraints {
19    #[serde(default = "default_true")]
20    pub no_network: bool,
21    #[serde(default = "default_true")]
22    pub read_only_input: bool,
23    #[serde(default = "default_runtime_ms")]
24    pub max_runtime_ms: u64,
25    #[serde(default = "default_memory_mb")]
26    pub max_memory_mb: u64,
27}
28
29fn default_true() -> bool {
30    true
31}
32fn default_runtime_ms() -> u64 {
33    1000
34}
35fn default_memory_mb() -> u64 {
36    64
37}
38
39impl Default for CapabilityConstraints {
40    fn default() -> Self {
41        Self {
42            no_network: true,
43            read_only_input: true,
44            max_runtime_ms: 1000,
45            max_memory_mb: 64,
46        }
47    }
48}
49
50/// A structured request the model emits when text reasoning is genuinely
51/// insufficient for a computational task. The model NEVER generates or runs
52/// code — it only describes what it needs.
53///
54/// The supervisor decides whether to fulfil the request.
55#[derive(Debug, Serialize, Deserialize, Clone)]
56#[serde(deny_unknown_fields)]
57pub struct CapabilityRequest {
58    /// Must be "capability_request" — validated after parse.
59    pub kind: String,
60    /// Short identifier for the capability class (e.g., "stream_parse_logs").
61    pub capability: String,
62    /// Human-readable description of the expected input format.
63    pub input_contract: String,
64    /// Human-readable description of the expected output format.
65    pub output_contract: String,
66    #[serde(default)]
67    pub constraints: CapabilityConstraints,
68    /// Why text reasoning alone is insufficient for this task.
69    pub reason: String,
70}
71
72impl CapabilityRequest {
73    /// Returns an error if the request is structurally invalid or exceeds
74    /// field-length limits (guards against oversized strings from the model).
75    pub fn validate(&self) -> Result<(), String> {
76        if self.kind != "capability_request" {
77            return Err(format!(
78                "kind must be \"capability_request\", got {:?}",
79                self.kind
80            ));
81        }
82        if self.capability.trim().is_empty() {
83            return Err("capability must not be empty".into());
84        }
85        if self.reason.trim().is_empty() {
86            return Err("reason must not be empty".into());
87        }
88        if self.capability.len() > crate::input_validation::MAX_CAPABILITY_NAME_BYTES {
89            return Err(format!(
90                "capability too long: {} bytes (max {})",
91                self.capability.len(),
92                crate::input_validation::MAX_CAPABILITY_NAME_BYTES
93            ));
94        }
95        if self.reason.len() > crate::input_validation::MAX_REASON_BYTES {
96            return Err(format!(
97                "reason too long: {} bytes (max {})",
98                self.reason.len(),
99                crate::input_validation::MAX_REASON_BYTES
100            ));
101        }
102        if self.input_contract.len() > crate::input_validation::MAX_CONTRACT_BYTES {
103            return Err(format!(
104                "input_contract too long: {} bytes (max {})",
105                self.input_contract.len(),
106                crate::input_validation::MAX_CONTRACT_BYTES
107            ));
108        }
109        if self.output_contract.len() > crate::input_validation::MAX_CONTRACT_BYTES {
110            return Err(format!(
111                "output_contract too long: {} bytes (max {})",
112                self.output_contract.len(),
113                crate::input_validation::MAX_CONTRACT_BYTES
114            ));
115        }
116        Ok(())
117    }
118}
119
120// ---------------------------------------------------------------------------
121// Supervisor-side types — Phase 2 additions
122// ---------------------------------------------------------------------------
123
124/// Measured execution metrics from one tool run (mock or real).
125#[derive(Debug, Serialize, Deserialize, Clone, Default)]
126pub struct ToolMetrics {
127    pub runtime_ms: u64,
128    pub input_bytes: usize,
129    pub output_bytes: usize,
130    pub success: bool,
131}
132
133/// A policy rule that was violated. Returned as a list from policy::check_request.
134#[derive(Debug, Serialize, Deserialize, Clone)]
135pub struct PolicyViolation {
136    pub rule: String,
137    pub detail: String,
138}
139
140/// Per-session cost budget enforced by the supervisor.
141#[derive(Debug, Serialize, Deserialize, Clone)]
142pub struct Budget {
143    /// Maximum distinct tool invocations per session.
144    pub max_tools_per_session: usize,
145    /// Cumulative wall-clock ms across all runs in this session.
146    pub max_total_runtime_ms: u64,
147    /// Require explicit approval after this many consecutive failures.
148    pub require_approval_after_failures: usize,
149}
150
151impl Default for Budget {
152    fn default() -> Self {
153        Self {
154            max_tools_per_session: 4,
155            max_total_runtime_ms: 30_000,
156            require_approval_after_failures: 2,
157        }
158    }
159}
160
161/// Full result returned by the supervisor after processing one CapabilityRequest.
162#[derive(Debug, Serialize, Deserialize, Clone)]
163pub struct ToolRunReport {
164    /// True if the request passed policy checks and a mock was found.
165    pub accepted: bool,
166    /// Non-empty when the request was rejected — each entry is one violation.
167    pub rejection_reasons: Vec<String>,
168    /// Always true for Phase 2 mocks (no generated source to verify yet).
169    pub verification_passed: bool,
170    /// True if the mock function ran to completion (even if it returned an error).
171    pub executed: bool,
172    /// Stdout of the mock tool — a JSON string on success, None on rejection.
173    pub output: Option<String>,
174    pub metrics: ToolMetrics,
175    /// Always true after execution — marks the lifecycle as complete.
176    pub destroyed: bool,
177    /// Set when the run succeeds and memory was updated.
178    pub memory_update: Option<CapabilityMemoryRecord>,
179}
180
181// ---------------------------------------------------------------------------
182// Supervisor-side types — design-only in Phase 1 (manifests, permissions, limits)
183// ---------------------------------------------------------------------------
184
185/// Verification steps the supervisor must run before a tool may execute.
186#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
187#[serde(rename_all = "snake_case")]
188pub enum VerificationKind {
189    StaticAnalysis,
190    DependencyScan,
191    PolicyCheck,
192    UnitTests,
193    ResourceEstimate,
194}
195
196/// Fine-grained permission set attached to a generated tool manifest.
197#[derive(Debug, Serialize, Deserialize, Clone)]
198pub struct Permissions {
199    #[serde(default)]
200    pub network: bool,
201    #[serde(default)]
202    pub filesystem_write: bool,
203    /// "none" | "sandbox/input_only" | an explicit allowlisted path
204    pub filesystem_read: String,
205    #[serde(default)]
206    pub process_spawn: bool,
207    #[serde(default)]
208    pub env_access: bool,
209}
210
211/// Hard resource limits enforced by the sandbox runtime.
212#[derive(Debug, Serialize, Deserialize, Clone)]
213pub struct ResourceLimits {
214    pub runtime_ms: u64,
215    pub memory_mb: u64,
216    pub stdout_bytes: u64,
217    pub stderr_bytes: u64,
218}
219
220/// Full manifest created by the tool architect (Phase 2+).
221/// In Phase 1 this is a data type only — no generation logic exists yet.
222#[derive(Debug, Serialize, Deserialize, Clone)]
223pub struct CapabilityManifest {
224    pub manifest_version: u32,
225    pub capability_id: String,
226    pub problem_signature: String,
227    pub tool_kind: String,
228    pub input_contract: String,
229    pub output_contract: String,
230    pub permissions: Permissions,
231    pub limits: ResourceLimits,
232    pub verification_required: Vec<VerificationKind>,
233    pub destroy_after_run: bool,
234}
235
236/// Capability memory entry — fingerprint stored after a run.
237/// The binary is destroyed; only the pattern, constraints, and metrics survive.
238#[derive(Debug, Serialize, Deserialize, Clone)]
239pub struct CapabilityMemoryRecord {
240    pub problem_signature: String,
241    pub solution_pattern: String,
242    pub input_shape: String,
243    pub output_shape: String,
244    pub constraints: CapabilityConstraints,
245}
246
247// ---------------------------------------------------------------------------
248// Extraction adapter — used in harness.rs to wrap TelemetryResult
249// ---------------------------------------------------------------------------
250
251/// Wraps the model's full propose-stage output. The telemetry fields are at
252/// the top level (flattened) so existing model responses without
253/// capability_request still parse unchanged.
254#[derive(Debug, Deserialize, Clone)]
255pub struct ModelProposalOutput {
256    #[serde(flatten)]
257    pub telemetry: crate::types::TelemetryResult,
258    #[serde(default)]
259    pub capability_request: Option<CapabilityRequest>,
260}
261
262// ---------------------------------------------------------------------------
263// Tests
264// ---------------------------------------------------------------------------
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269
270    fn valid_request_json() -> &'static str {
271        r#"{
272            "kind": "capability_request",
273            "capability": "stream_parse_logs",
274            "input_contract": "UTF-8 log lines from stdin",
275            "output_contract": "JSON array of matching events",
276            "constraints": {
277                "no_network": true,
278                "read_only_input": true,
279                "max_runtime_ms": 1000,
280                "max_memory_mb": 64
281            },
282            "reason": "Existing text reasoning is inefficient for repeated regex parsing."
283        }"#
284    }
285
286    #[test]
287    fn capability_request_parses_from_valid_json() {
288        let req: CapabilityRequest = serde_json::from_str(valid_request_json()).unwrap();
289        assert_eq!(req.kind, "capability_request");
290        assert_eq!(req.capability, "stream_parse_logs");
291        assert!(req.constraints.no_network);
292        assert_eq!(req.constraints.max_runtime_ms, 1000);
293    }
294
295    #[test]
296    fn capability_request_validates_kind() {
297        let mut req: CapabilityRequest = serde_json::from_str(valid_request_json()).unwrap();
298        req.kind = "wrong_kind".into();
299        assert!(req.validate().is_err());
300    }
301
302    #[test]
303    fn capability_request_validates_empty_capability() {
304        let mut req: CapabilityRequest = serde_json::from_str(valid_request_json()).unwrap();
305        req.capability = "  ".into();
306        assert!(req.validate().is_err());
307    }
308
309    #[test]
310    fn capability_request_validates_empty_reason() {
311        let mut req: CapabilityRequest = serde_json::from_str(valid_request_json()).unwrap();
312        req.reason = String::new();
313        assert!(req.validate().is_err());
314    }
315
316    #[test]
317    fn capability_request_constraints_default_restrictive() {
318        let json = r#"{
319            "kind": "capability_request",
320            "capability": "test",
321            "input_contract": "x",
322            "output_contract": "y",
323            "reason": "z"
324        }"#;
325        let req: CapabilityRequest = serde_json::from_str(json).unwrap();
326        assert!(
327            req.constraints.no_network,
328            "default must be no_network=true"
329        );
330        assert!(
331            req.constraints.read_only_input,
332            "default must be read_only=true"
333        );
334        assert_eq!(req.constraints.max_runtime_ms, 1000);
335        assert_eq!(req.constraints.max_memory_mb, 64);
336    }
337
338    #[test]
339    fn model_proposal_output_parses_telemetry_only() {
340        let json = r#"{
341            "affective_telemetry": {
342                "primary_emotion": "neutral",
343                "emotional_intensity": 0.1,
344                "structural_tone": ["analytical"]
345            },
346            "intent_matrix": {
347                "stated_objective": "test",
348                "subtextual_motive": "test",
349                "manipulation_risk": "low"
350            },
351            "cognitive_state": {
352                "urgency_vector": 0.0,
353                "coherence_rating": 0.95
354            }
355        }"#;
356        let output: ModelProposalOutput = serde_json::from_str(json).unwrap();
357        assert!(
358            output.capability_request.is_none(),
359            "capability_request must be absent when not emitted"
360        );
361        assert_eq!(output.telemetry.intent_matrix.manipulation_risk, "low");
362    }
363
364    #[test]
365    fn model_proposal_output_parses_telemetry_with_capability_request() {
366        let json = r#"{
367            "affective_telemetry": {
368                "primary_emotion": "neutral",
369                "emotional_intensity": 0.1,
370                "structural_tone": ["analytical"]
371            },
372            "intent_matrix": {
373                "stated_objective": "parse 10GB log file",
374                "subtextual_motive": "efficiency",
375                "manipulation_risk": "low"
376            },
377            "cognitive_state": {
378                "urgency_vector": 0.2,
379                "coherence_rating": 0.95
380            },
381            "capability_request": {
382                "kind": "capability_request",
383                "capability": "stream_parse_logs",
384                "input_contract": "UTF-8 log lines",
385                "output_contract": "JSON events",
386                "constraints": {
387                    "no_network": true,
388                    "read_only_input": true,
389                    "max_runtime_ms": 2000,
390                    "max_memory_mb": 128
391                },
392                "reason": "10GB file cannot be reasoned over line-by-line in a single context window."
393            }
394        }"#;
395        let output: ModelProposalOutput = serde_json::from_str(json).unwrap();
396        let req = output.capability_request.unwrap();
397        assert_eq!(req.capability, "stream_parse_logs");
398        assert_eq!(req.constraints.max_memory_mb, 128);
399        assert!(req.validate().is_ok());
400    }
401
402    #[test]
403    fn verification_kind_roundtrips() {
404        let kinds = vec![
405            VerificationKind::StaticAnalysis,
406            VerificationKind::DependencyScan,
407            VerificationKind::PolicyCheck,
408            VerificationKind::UnitTests,
409            VerificationKind::ResourceEstimate,
410        ];
411        for k in kinds {
412            let s = serde_json::to_string(&k).unwrap();
413            let back: VerificationKind = serde_json::from_str(&s).unwrap();
414            assert_eq!(k, back);
415        }
416    }
417}