zeph_core/quality/
parser.rs1use std::time::Duration;
10
11use serde::de::DeserializeOwned;
12use thiserror::Error;
13use zeph_llm::any::AnyProvider;
14use zeph_llm::provider::{LlmProvider, Message, MessageMetadata, Role};
15
16#[non_exhaustive]
17#[derive(Debug, Error)]
19pub enum ParseError {
20 #[error("no opening brace found in output")]
21 NoBraceSpan,
22 #[error("JSON parse failed: {0}")]
23 Json(#[from] serde_json::Error),
24}
25
26#[non_exhaustive]
27#[derive(Debug, Error)]
29pub enum ChatJsonError {
30 #[error("LLM error: {0}")]
31 Llm(#[from] zeph_llm::LlmError),
32 #[error("timed out after {0}ms")]
33 Timeout(u64),
34 #[error("failed to parse JSON after 2 attempts; last raw (truncated): {0}")]
35 Parse(String),
36}
37
38fn strip_fences(raw: &str) -> &str {
40 let trimmed = raw.trim();
41 if let Some(rest) = trimmed.strip_prefix("```") {
42 let after_lang = if let Some(nl) = rest.find('\n') {
43 &rest[nl + 1..]
44 } else {
45 rest
46 };
47 if let Some(end) = after_lang.rfind("```") {
48 return after_lang[..end].trim();
49 }
50 return after_lang.trim();
51 }
52 trimmed
53}
54
55fn find_first_brace_span(s: &str) -> Option<&str> {
57 let open = s.find(['{', '['])?;
58 let opener = s.as_bytes()[open];
59 let closer = if opener == b'{' { b'}' } else { b']' };
60 let mut depth = 0i32;
61 let bytes = s.as_bytes();
62 let mut close = None;
63 for (i, &b) in bytes.iter().enumerate().skip(open) {
64 if b == opener {
65 depth += 1;
66 } else if b == closer {
67 depth -= 1;
68 if depth == 0 {
69 close = Some(i);
70 break;
71 }
72 }
73 }
74 let close = close?;
75 Some(&s[open..=close])
76}
77
78pub fn parse_json<T: DeserializeOwned>(raw: &str) -> Result<T, ParseError> {
84 let stripped = strip_fences(raw);
85 let span = find_first_brace_span(stripped).ok_or(ParseError::NoBraceSpan)?;
86 Ok(serde_json::from_str(span)?)
87}
88
89fn build_messages(system: &str, user: &str) -> Vec<Message> {
91 vec![
92 Message {
93 role: Role::System,
94 content: system.to_owned(),
95 parts: vec![],
96 metadata: MessageMetadata::default(),
97 },
98 Message {
99 role: Role::User,
100 content: user.to_owned(),
101 parts: vec![],
102 metadata: MessageMetadata::default(),
103 },
104 ]
105}
106
107#[must_use]
109pub fn approx_tokens(s: &str) -> u64 {
110 (s.len() as u64).saturating_add(3) / 4
111}
112
113fn timeout_ms(d: Duration) -> u64 {
115 u64::try_from(d.as_millis()).unwrap_or(u64::MAX)
116}
117
118pub async fn chat_json<T: DeserializeOwned>(
126 provider: &AnyProvider,
127 system: &str,
128 user: &str,
129 per_call_timeout: Duration,
130) -> Result<(T, u64, u32), ChatJsonError> {
131 let msgs = build_messages(system, user);
132
133 let first = tokio::time::timeout(per_call_timeout, provider.chat(&msgs)).await;
135 match first {
136 Ok(Ok(raw)) => {
137 if let Ok(v) = parse_json::<T>(&raw) {
138 return Ok((v, approx_tokens(&raw), 1));
139 }
140 let retry_user = format!(
142 "{user}\n\nPrevious output was not valid JSON. \
143 Re-output strict JSON only, no prose, no fences."
144 );
145 let retry_msgs = build_messages(system, &retry_user);
146 let second = tokio::time::timeout(per_call_timeout, provider.chat(&retry_msgs)).await;
147 match second {
148 Ok(Ok(raw2)) => parse_json::<T>(&raw2)
149 .map(|v| (v, approx_tokens(&raw2), 2))
150 .map_err(|_| {
151 let truncated = if raw2.len() > 4096 {
152 let end = raw2.floor_char_boundary(4096);
153 format!("{}…", &raw2[..end])
154 } else {
155 raw2.clone()
156 };
157 ChatJsonError::Parse(truncated)
158 }),
159 Ok(Err(e)) => Err(ChatJsonError::Llm(e)),
160 Err(_) => Err(ChatJsonError::Timeout(timeout_ms(per_call_timeout))),
161 }
162 }
163 Ok(Err(e)) => Err(ChatJsonError::Llm(e)),
164 Err(_) => Err(ChatJsonError::Timeout(timeout_ms(per_call_timeout))),
165 }
166}
167
168#[cfg(test)]
169mod tests {
170 use super::*;
171
172 #[test]
173 fn strips_json_markdown_fences() {
174 let raw = "```json\n{\"a\":1}\n```";
175 let v: serde_json::Value = parse_json(raw).unwrap();
176 assert_eq!(v["a"], 1);
177 }
178
179 #[test]
180 fn strips_plain_fences() {
181 let raw = "```\n{\"a\":2}\n```";
182 let v: serde_json::Value = parse_json(raw).unwrap();
183 assert_eq!(v["a"], 2);
184 }
185
186 #[test]
187 fn finds_brace_span_in_prose() {
188 let raw = "Here is the JSON: {\"x\":42} as requested.";
189 let v: serde_json::Value = parse_json(raw).unwrap();
190 assert_eq!(v["x"], 42);
191 }
192
193 #[test]
194 fn returns_error_on_no_brace() {
195 let result = parse_json::<serde_json::Value>("no json here");
196 assert!(matches!(result, Err(ParseError::NoBraceSpan)));
197 }
198
199 #[test]
200 fn handles_nested_braces() {
201 let raw = r#"{"outer":{"inner":1}}"#;
202 let v: serde_json::Value = parse_json(raw).unwrap();
203 assert_eq!(v["outer"]["inner"], 1);
204 }
205}