tdln_brain/
parser.rs

1//! Strict JSON extraction and validation.
2//!
3//! Extracts a single JSON object from LLM output (supporting markdown blocks
4//! and inline JSON), then validates it into a [`SemanticUnit`].
5
6use crate::{BrainError, Decision, UsageMeta};
7use serde_json::Value;
8use std::collections::BTreeMap;
9use tdln_ast::SemanticUnit;
10
11/// Parse a raw LLM response into a strict [`Decision`].
12///
13/// # Errors
14///
15/// Returns `BrainError::Hallucination` if the output cannot be parsed
16/// into a valid `SemanticUnit`.
17pub fn parse_decision(raw: &str, meta: UsageMeta) -> Result<Decision, BrainError> {
18    let (json_str, reasoning) = extract_json_block(raw);
19
20    // Parse as generic JSON first
21    let value: Value = serde_json::from_str(&json_str).map_err(|e| {
22        BrainError::Hallucination(format!("Invalid JSON: {e}; input: {json_str}"))
23    })?;
24
25    // Extract kind (required)
26    let kind = value
27        .get("kind")
28        .and_then(Value::as_str)
29        .ok_or_else(|| BrainError::Hallucination("missing 'kind' field".into()))?
30        .to_string();
31
32    // Extract slots (optional, default to empty)
33    let slots: BTreeMap<String, Value> = value
34        .get("slots")
35        .cloned()
36        .map(|v| {
37            serde_json::from_value(v)
38                .map_err(|e| BrainError::Hallucination(format!("invalid slots: {e}")))
39        })
40        .transpose()?
41        .unwrap_or_default();
42
43    // Compute source_hash from the JSON string
44    let source_hash: [u8; 32] = blake3::hash(json_str.as_bytes()).into();
45
46    let intent = SemanticUnit {
47        kind,
48        slots,
49        source_hash,
50    };
51
52    Ok(Decision {
53        reasoning,
54        intent,
55        meta,
56    })
57}
58
59/// Extract a JSON block from raw text.
60///
61/// Supports:
62/// - Fenced blocks: ` ```json ... ``` `
63/// - Direct JSON objects: `{ ... }`
64/// - JSON embedded in prose
65///
66/// Returns `(json_string, optional_reasoning)`.
67fn extract_json_block(text: &str) -> (String, Option<String>) {
68    // Try: markdown fenced block
69    if let Some(s) = text.find("```json") {
70        if let Some(end_rel) = text[s + 7..].find("```") {
71            let json = text[s + 7..s + 7 + end_rel].trim().to_string();
72            let reasoning = if s > 0 {
73                let before = text[..s].trim();
74                if before.is_empty() {
75                    None
76                } else {
77                    Some(before.to_string())
78                }
79            } else {
80                None
81            };
82            return (json, reasoning);
83        }
84    }
85
86    // Try: find JSON object span
87    if let (Some(a), Some(b)) = (text.find('{'), text.rfind('}')) {
88        if b > a {
89            let json = text[a..=b].to_string();
90            let reasoning = if a > 0 {
91                let before = text[..a].trim();
92                if before.is_empty() {
93                    None
94                } else {
95                    Some(before.to_string())
96                }
97            } else {
98                None
99            };
100            return (json, reasoning);
101        }
102    }
103
104    // Fallback: entire text
105    (text.to_string(), None)
106}
107
108#[cfg(test)]
109mod tests {
110    use super::*;
111
112    #[test]
113    fn extract_clean_json() {
114        let (j, r) = extract_json_block(r#"{"kind":"test","slots":{}}"#);
115        assert_eq!(j, r#"{"kind":"test","slots":{}}"#);
116        assert!(r.is_none());
117    }
118
119    #[test]
120    fn extract_markdown_block() {
121        let inp = "thinking...\n```json\n{\"kind\":\"x\",\"slots\":{}}\n```\n";
122        let (j, r) = extract_json_block(inp);
123        assert!(j.contains(r#""kind":"x""#));
124        assert_eq!(r.unwrap(), "thinking...");
125    }
126
127    #[test]
128    fn extract_json_with_prose() {
129        let inp = r#"I'll create a grant. {"kind":"grant","slots":{"to":"bob"}}"#;
130        let (j, r) = extract_json_block(inp);
131        assert!(j.contains("grant"));
132        assert!(r.is_some());
133    }
134
135    #[test]
136    fn parses_valid_semantic_unit() {
137        let raw = r#"{"kind":"noop","slots":{}}"#;
138        let dec = parse_decision(raw, UsageMeta::default()).unwrap();
139        assert_eq!(dec.intent.kind, "noop");
140        assert!(dec.reasoning.is_none());
141    }
142
143    #[test]
144    fn parses_markdown_wrapped() {
145        let raw = "Let me think...\n```json\n{\"kind\":\"grant\",\"slots\":{\"to\":\"alice\"}}\n```\n";
146        let dec = parse_decision(raw, UsageMeta::default()).unwrap();
147        assert_eq!(dec.intent.kind, "grant");
148        assert!(dec.reasoning.is_some());
149        assert!(dec.reasoning.unwrap().contains("think"));
150    }
151
152    #[test]
153    fn rejects_invalid_json() {
154        let raw = r#"{"kind":"#;
155        let result = parse_decision(raw, UsageMeta::default());
156        assert!(matches!(result, Err(BrainError::Hallucination(_))));
157    }
158
159    #[test]
160    fn rejects_missing_kind() {
161        let raw = r#"{"action":"test"}"#;
162        let result = parse_decision(raw, UsageMeta::default());
163        assert!(matches!(result, Err(BrainError::Hallucination(_))));
164    }
165
166    #[test]
167    fn handles_empty_slots() {
168        let raw = r#"{"kind":"noop"}"#;
169        let dec = parse_decision(raw, UsageMeta::default()).unwrap();
170        assert_eq!(dec.intent.kind, "noop");
171    }
172
173    #[test]
174    fn handles_nested_slots() {
175        let raw = r#"{"kind":"policy","slots":{"rules":{"max":500}}}"#;
176        let dec = parse_decision(raw, UsageMeta::default()).unwrap();
177        assert_eq!(dec.intent.kind, "policy");
178        assert!(dec.intent.slots.contains_key("rules"));
179    }
180}