use crate::error::{LlmError, LlmResult};
use crate::logging::{log_debug, log_warn};
use serde_json::Value;
pub struct ResponseParser;
impl ResponseParser {
pub fn parse_llm_output(raw: &str) -> LlmResult<Value> {
log_debug!(
content_length = raw.len(),
content_preview = raw.chars().take(200).collect::<String>(),
"Parsing LLM output for structured JSON"
);
if let Ok(structured) = serde_json::from_str::<Value>(raw) {
log_debug!("Successfully parsed JSON directly");
return Self::validate_and_return(structured);
}
let cleaned = Self::clean_artifacts(raw);
if cleaned != raw {
log_debug!(
original_length = raw.len(),
cleaned_length = cleaned.len(),
"Cleaned artifacts from LLM response"
);
if let Ok(structured) = serde_json::from_str::<Value>(&cleaned) {
log_debug!("Successfully parsed JSON after artifact cleaning");
return Self::validate_and_return(structured);
}
}
if let Some(json_str) = Self::extract_json_object(&cleaned) {
log_debug!(
extracted_length = json_str.len(),
"Extracted JSON object from mixed content"
);
if let Ok(structured) = serde_json::from_str::<Value>(&json_str) {
log_debug!("Successfully parsed JSON after extraction");
return Self::validate_and_return(structured);
}
}
let preview = raw.chars().take(200).collect::<String>();
log_warn!(
content_preview = preview,
"Failed to parse structured response from LLM output"
);
Err(LlmError::response_parsing_error(format!(
"Could not parse structured JSON response from: {}{}",
preview,
if raw.len() > 200 { "..." } else { "" }
)))
}
fn validate_and_return(response: Value) -> LlmResult<Value> {
if !response.is_object() {
return Err(LlmError::response_parsing_error(
"Structured response must be a JSON object".to_string(),
));
}
if let Some(obj) = response.as_object() {
if obj.is_empty() {
return Err(LlmError::response_parsing_error(
"Structured response cannot be empty object".to_string(),
));
}
}
Ok(response)
}
fn clean_artifacts(content: &str) -> String {
let mut cleaned = content.to_string();
cleaned = cleaned
.replace("<|channel|>", "")
.replace("```json", "")
.replace("```JSON", "")
.replace("```", "")
.replace("<|end|>", "")
.replace("<|start|>", "");
cleaned = cleaned
.trim()
.chars()
.filter(|c| !c.is_control() || c.is_whitespace())
.collect();
log_debug!(
original_length = content.len(),
cleaned_length = cleaned.len(),
"Cleaned LLM response artifacts"
);
cleaned
}
fn extract_json_object(content: &str) -> Option<String> {
let start_idx = content.find('{')?;
Self::extract_balanced_json(&content[start_idx..]).map(|(json_str, _)| json_str)
}
fn extract_balanced_json(text: &str) -> Option<(String, usize)> {
let trimmed = text.trim_start();
if !trimmed.starts_with('{') {
return None;
}
let chars: Vec<char> = trimmed.chars().collect();
let json_end = Self::find_balanced_json_end(&chars)?;
let json_chars: String = chars[0..=json_end].iter().collect();
let json_byte_len = json_chars.len();
let offset = text.len() - trimmed.len(); Some((json_chars, offset + json_byte_len))
}
fn find_balanced_json_end(chars: &[char]) -> Option<usize> {
let mut brace_count = 0;
let mut in_string = false;
let mut escaped = false;
for (char_idx, ch) in chars.iter().enumerate() {
match ch {
'"' if !escaped => in_string = !in_string,
'\\' if in_string => escaped = !escaped,
'{' if !in_string => brace_count += 1,
'}' if !in_string => {
brace_count -= 1;
if brace_count == 0 {
return Some(char_idx);
}
}
_ => escaped = false,
}
if *ch != '\\' {
escaped = false;
}
}
None }
}