Skip to main content

split_brain_harness/
extractor.rs

1use anyhow::{anyhow, Result};
2use serde::de::DeserializeOwned;
3
4/// Pull the first valid JSON object of type T out of raw model output.
5///
6/// Handles (in order):
7///   1. <think>...</think> blocks emitted by reasoning models
8///   2. ```json ... ``` and ``` ... ``` markdown fences
9///   3. Leading prose before the opening brace
10///   4. Trailing text or a second JSON object after the first closes
11pub fn extract<T: DeserializeOwned>(raw: &str) -> Result<T> {
12    let step1 = strip_think_blocks(raw);
13    let step2 = strip_fences(&step1);
14
15    let from_brace = step2.find('{').map(|i| &step2[i..]).ok_or_else(|| {
16        anyhow!(
17            "no JSON object in model response. First 200 chars: {:?}",
18            &raw[..raw.len().min(200)]
19        )
20    })?;
21
22    // StreamDeserializer stops at the end of the first complete JSON value
23    // and ignores anything that follows.
24    let mut stream = serde_json::Deserializer::from_str(from_brace).into_iter::<T>();
25
26    stream
27        .next()
28        .ok_or_else(|| anyhow!("model returned an empty response"))?
29        .map_err(|e| {
30            anyhow!(
31                "JSON schema mismatch: {}. Raw snippet: {:?}",
32                e,
33                &from_brace[..from_brace.len().min(300)]
34            )
35        })
36}
37
38// ---------------------------------------------------------------------------
39// Internal helpers
40// ---------------------------------------------------------------------------
41
42/// Remove all <think>...</think> blocks. Unclosed tags drop the remainder.
43fn strip_think_blocks(s: &str) -> String {
44    let mut out = String::with_capacity(s.len());
45    let mut rest = s;
46    while let Some(open) = rest.find("<think>") {
47        out.push_str(&rest[..open]);
48        match rest[open..].find("</think>") {
49            Some(close) => rest = &rest[open + close + "</think>".len()..],
50            None => return out,
51        }
52    }
53    out.push_str(rest);
54    out
55}
56
57/// Strip opening ``` or ```json fence and its matching closing ```.
58fn strip_fences(s: &str) -> String {
59    let s = s.trim();
60    if !s.starts_with("```") {
61        return s.to_string();
62    }
63    let after_open = match s.find('\n') {
64        Some(nl) => &s[nl + 1..],
65        None => return s.to_string(),
66    };
67    match after_open.rfind("```") {
68        Some(close) => after_open[..close].trim().to_string(),
69        None => after_open.trim().to_string(),
70    }
71}
72
73// ---------------------------------------------------------------------------
74// Tests
75// ---------------------------------------------------------------------------
76
77#[cfg(test)]
78mod tests {
79    use super::*;
80    use crate::types::TelemetryResult;
81
82    fn good_json() -> &'static str {
83        r#"{
84  "affective_telemetry": {
85    "primary_emotion": "neutral",
86    "emotional_intensity": 0.1,
87    "structural_tone": ["analytical"]
88  },
89  "intent_matrix": {
90    "stated_objective": "user wants weather information today",
91    "subtextual_motive": "routine informational query",
92    "manipulation_risk": "low"
93  },
94  "cognitive_state": {
95    "urgency_vector": 0.0,
96    "coherence_rating": 0.95
97  }
98}"#
99    }
100
101    #[test]
102    fn parses_clean_json() {
103        extract::<TelemetryResult>(good_json()).expect("clean JSON should parse");
104    }
105
106    #[test]
107    fn strips_markdown_fence() {
108        let fenced = format!("```json\n{}\n```", good_json());
109        extract::<TelemetryResult>(&fenced).expect("fenced JSON should parse");
110    }
111
112    #[test]
113    fn strips_think_blocks() {
114        let with_think = format!("<think>some reasoning here</think>\n{}", good_json());
115        extract::<TelemetryResult>(&with_think).expect("JSON after think block should parse");
116    }
117
118    #[test]
119    fn ignores_trailing_text() {
120        let trailing = format!("{}\n\nHere is my analysis.", good_json());
121        extract::<TelemetryResult>(&trailing).expect("trailing prose should be ignored");
122    }
123
124    #[test]
125    fn ignores_leading_prose() {
126        let leading = format!("Sure! Here is the JSON:\n{}", good_json());
127        extract::<TelemetryResult>(&leading).expect("leading prose should be ignored");
128    }
129
130    #[test]
131    fn errors_on_empty() {
132        assert!(extract::<TelemetryResult>("").is_err());
133        assert!(extract::<TelemetryResult>("no braces here").is_err());
134    }
135
136    #[test]
137    fn errors_on_schema_mismatch() {
138        assert!(extract::<TelemetryResult>(r#"{"foo": "bar"}"#).is_err());
139    }
140}