zeph_core/quality/
parser.rs1use std::time::Duration;
10
11use serde::de::DeserializeOwned;
12use thiserror::Error;
13use zeph_llm::any::AnyProvider;
14use zeph_llm::provider::{LlmProvider, Message, MessageMetadata, Role};
15
16#[derive(Debug, Error)]
18pub enum ParseError {
19 #[error("no opening brace found in output")]
20 NoBraceSpan,
21 #[error("JSON parse failed: {0}")]
22 Json(#[from] serde_json::Error),
23}
24
25#[derive(Debug, Error)]
27pub enum ChatJsonError {
28 #[error("LLM error: {0}")]
29 Llm(#[from] zeph_llm::LlmError),
30 #[error("timed out after {0}ms")]
31 Timeout(u64),
32 #[error("failed to parse JSON after 2 attempts; last raw (truncated): {0}")]
33 Parse(String),
34}
35
36fn strip_fences(raw: &str) -> &str {
38 let trimmed = raw.trim();
39 if let Some(rest) = trimmed.strip_prefix("```") {
40 let after_lang = if let Some(nl) = rest.find('\n') {
41 &rest[nl + 1..]
42 } else {
43 rest
44 };
45 if let Some(end) = after_lang.rfind("```") {
46 return after_lang[..end].trim();
47 }
48 return after_lang.trim();
49 }
50 trimmed
51}
52
53fn find_first_brace_span(s: &str) -> Option<&str> {
55 let open = s.find(['{', '['])?;
56 let opener = s.as_bytes()[open];
57 let closer = if opener == b'{' { b'}' } else { b']' };
58 let mut depth = 0i32;
59 let bytes = s.as_bytes();
60 let mut close = None;
61 for (i, &b) in bytes.iter().enumerate().skip(open) {
62 if b == opener {
63 depth += 1;
64 } else if b == closer {
65 depth -= 1;
66 if depth == 0 {
67 close = Some(i);
68 break;
69 }
70 }
71 }
72 let close = close?;
73 Some(&s[open..=close])
74}
75
76pub fn parse_json<T: DeserializeOwned>(raw: &str) -> Result<T, ParseError> {
82 let stripped = strip_fences(raw);
83 let span = find_first_brace_span(stripped).ok_or(ParseError::NoBraceSpan)?;
84 Ok(serde_json::from_str(span)?)
85}
86
87fn build_messages(system: &str, user: &str) -> Vec<Message> {
89 vec![
90 Message {
91 role: Role::System,
92 content: system.to_owned(),
93 parts: vec![],
94 metadata: MessageMetadata::default(),
95 },
96 Message {
97 role: Role::User,
98 content: user.to_owned(),
99 parts: vec![],
100 metadata: MessageMetadata::default(),
101 },
102 ]
103}
104
105#[must_use]
107pub fn approx_tokens(s: &str) -> u64 {
108 (s.len() as u64).saturating_add(3) / 4
109}
110
111fn timeout_ms(d: Duration) -> u64 {
113 u64::try_from(d.as_millis()).unwrap_or(u64::MAX)
114}
115
116pub async fn chat_json<T: DeserializeOwned>(
124 provider: &AnyProvider,
125 system: &str,
126 user: &str,
127 per_call_timeout: Duration,
128) -> Result<(T, u64, u32), ChatJsonError> {
129 let msgs = build_messages(system, user);
130
131 let first = tokio::time::timeout(per_call_timeout, provider.chat(&msgs)).await;
133 match first {
134 Ok(Ok(raw)) => {
135 if let Ok(v) = parse_json::<T>(&raw) {
136 return Ok((v, approx_tokens(&raw), 1));
137 }
138 let retry_user = format!(
140 "{user}\n\nPrevious output was not valid JSON. \
141 Re-output strict JSON only, no prose, no fences."
142 );
143 let retry_msgs = build_messages(system, &retry_user);
144 let second = tokio::time::timeout(per_call_timeout, provider.chat(&retry_msgs)).await;
145 match second {
146 Ok(Ok(raw2)) => parse_json::<T>(&raw2)
147 .map(|v| (v, approx_tokens(&raw2), 2))
148 .map_err(|_| {
149 let truncated = if raw2.len() > 4096 {
150 let end = raw2.floor_char_boundary(4096);
151 format!("{}…", &raw2[..end])
152 } else {
153 raw2.clone()
154 };
155 ChatJsonError::Parse(truncated)
156 }),
157 Ok(Err(e)) => Err(ChatJsonError::Llm(e)),
158 Err(_) => Err(ChatJsonError::Timeout(timeout_ms(per_call_timeout))),
159 }
160 }
161 Ok(Err(e)) => Err(ChatJsonError::Llm(e)),
162 Err(_) => Err(ChatJsonError::Timeout(timeout_ms(per_call_timeout))),
163 }
164}
165
166#[cfg(test)]
167mod tests {
168 use super::*;
169
170 #[test]
171 fn strips_json_markdown_fences() {
172 let raw = "```json\n{\"a\":1}\n```";
173 let v: serde_json::Value = parse_json(raw).unwrap();
174 assert_eq!(v["a"], 1);
175 }
176
177 #[test]
178 fn strips_plain_fences() {
179 let raw = "```\n{\"a\":2}\n```";
180 let v: serde_json::Value = parse_json(raw).unwrap();
181 assert_eq!(v["a"], 2);
182 }
183
184 #[test]
185 fn finds_brace_span_in_prose() {
186 let raw = "Here is the JSON: {\"x\":42} as requested.";
187 let v: serde_json::Value = parse_json(raw).unwrap();
188 assert_eq!(v["x"], 42);
189 }
190
191 #[test]
192 fn returns_error_on_no_brace() {
193 let result = parse_json::<serde_json::Value>("no json here");
194 assert!(matches!(result, Err(ParseError::NoBraceSpan)));
195 }
196
197 #[test]
198 fn handles_nested_braces() {
199 let raw = r#"{"outer":{"inner":1}}"#;
200 let v: serde_json::Value = parse_json(raw).unwrap();
201 assert_eq!(v["outer"]["inner"], 1);
202 }
203}