use anyhow::Result;
use serde_json::Value as JsonValue;
fn normalize_quotes(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut in_string = false;
let mut escape_next = false;
for c in text.chars() {
if escape_next {
result.push(c);
escape_next = false;
continue;
}
if c == '\\' && in_string {
result.push(c);
escape_next = true;
continue;
}
if c == '"' {
in_string = !in_string;
result.push(c);
continue;
}
if in_string {
match c {
'\u{201C}' | '\u{201D}' => result.push_str("\\\""),
'\u{2018}' | '\u{2019}' => result.push('\''),
'\n' => result.push_str("\\n"),
'\r' => result.push_str("\\r"),
'\t' => result.push_str("\\t"),
_ => result.push(c),
}
} else {
match c {
'\u{201C}' | '\u{201D}' => result.push('"'),
'\u{2018}' | '\u{2019}' => result.push('\''),
_ => result.push(c),
}
}
}
result
}
pub fn try_parse_partial_json(partial_json: &str) -> Result<Option<JsonValue>> {
let trimmed = partial_json.trim();
if trimmed.is_empty() {
return Ok(None);
}
if let Ok(value) = serde_json::from_str::<JsonValue>(trimmed) {
return Ok(Some(value));
}
let normalized = normalize_quotes(trimmed);
if let Ok(value) = serde_json::from_str::<JsonValue>(&normalized) {
return Ok(Some(value));
}
let escaped = escape_control_chars_in_strings(&normalized);
if let Ok(value) = serde_json::from_str::<JsonValue>(&escaped) {
return Ok(Some(value));
}
let extracted = extract_from_markdown(trimmed);
if extracted != trimmed {
if let Ok(value) = serde_json::from_str::<JsonValue>(&extracted) {
return Ok(Some(value));
}
let normalized_extracted = normalize_quotes(&extracted);
if let Ok(value) = serde_json::from_str::<JsonValue>(&normalized_extracted) {
return Ok(Some(value));
}
let escaped_extracted = escape_control_chars_in_strings(&normalized_extracted);
if let Ok(value) = serde_json::from_str::<JsonValue>(&escaped_extracted) {
return Ok(Some(value));
}
let attempts_extracted = generate_completion_attempts(&escaped_extracted);
for attempt in attempts_extracted {
if let Ok(value) = serde_json::from_str::<JsonValue>(&attempt) {
return Ok(Some(value));
}
}
}
let attempts = generate_completion_attempts(&escaped);
for attempt in attempts {
if let Ok(value) = serde_json::from_str::<JsonValue>(&attempt) {
return Ok(Some(value));
}
}
Ok(None)
}
fn extract_from_markdown(text: &str) -> String {
let text_lower = text.to_lowercase();
if let Some(start) = text_lower.find("```json") {
let json_start = start + 7;
if let Some(end_offset) = text[json_start..].find("```") {
let json_end = json_start + end_offset;
return text[json_start..json_end].trim().to_string();
}
return text[json_start..].trim().to_string();
}
if let Some(start) = text.find("```") {
let content_start = start + 3;
if let Some(end) = text[content_start..].find("```") {
let content_end = content_start + end;
let content = text[content_start..content_end].trim();
if content.starts_with('{') || content.starts_with('[') {
return content.to_string();
}
} else {
let content = text[content_start..].trim();
if content.starts_with('{') || content.starts_with('[') {
return content.to_string();
}
}
}
if let Some(start) = text.find('{') {
if let Some(end) = text.rfind('}') {
if end > start {
return text[start..=end].to_string();
}
}
return text[start..].to_string();
}
if let Some(start) = text.find('[') {
if let Some(end) = text.rfind(']') {
if end > start {
return text[start..=end].to_string();
}
}
return text[start..].to_string();
}
text.to_string()
}
fn escape_control_chars_in_strings(json: &str) -> String {
let mut result = String::with_capacity(json.len());
let mut in_string = false;
let mut escape_next = false;
for c in json.chars() {
if escape_next {
result.push(c);
escape_next = false;
continue;
}
if c == '\\' && in_string {
result.push(c);
escape_next = true;
continue;
}
if c == '"' {
in_string = !in_string;
result.push(c);
continue;
}
if in_string {
match c {
'\n' => result.push_str("\\n"),
'\r' => result.push_str("\\r"),
'\t' => result.push_str("\\t"),
'\x08' => result.push_str("\\b"),
'\x0C' => result.push_str("\\f"),
_ => result.push(c),
}
} else {
result.push(c);
}
}
result
}
fn count_braces_string_aware(json: &str) -> (usize, usize, usize, usize) {
let mut open_braces = 0;
let mut close_braces = 0;
let mut open_brackets = 0;
let mut close_brackets = 0;
let mut in_string = false;
let mut escape_next = false;
for c in json.chars() {
if escape_next {
escape_next = false;
continue;
}
if c == '\\' && in_string {
escape_next = true;
continue;
}
if c == '"' {
in_string = !in_string;
continue;
}
if in_string {
continue;
}
match c {
'{' => open_braces += 1,
'}' => close_braces += 1,
'[' => open_brackets += 1,
']' => close_brackets += 1,
_ => {}
}
}
(open_braces, close_braces, open_brackets, close_brackets)
}
fn generate_completion_attempts(json: &str) -> Vec<String> {
let json = json.trim();
let mut attempts = Vec::new();
let (open_braces, close_braces, open_brackets, close_brackets) =
count_braces_string_aware(json);
let mut completion = json.to_string();
if has_incomplete_string(&completion) {
completion.push('"');
}
for _ in 0..(open_brackets.saturating_sub(close_brackets)) {
completion.push(']');
}
for _ in 0..(open_braces.saturating_sub(close_braces)) {
completion.push('}');
}
attempts.push(completion);
let mut aggressive = json.to_string();
if json.trim_end().ends_with(':') {
aggressive.push_str("null");
} else if json.trim_end().ends_with(',') {
aggressive = aggressive.trim_end().trim_end_matches(',').to_string();
}
if has_incomplete_string(&aggressive) {
aggressive.push('"');
}
for _ in 0..(open_brackets.saturating_sub(close_brackets)) {
aggressive.push(']');
}
for _ in 0..(open_braces.saturating_sub(close_braces)) {
aggressive.push('}');
}
attempts.push(aggressive);
if let Some(last_comma) = json.rfind(',') {
let mut truncated = json[..=last_comma].to_string();
truncated = truncated.trim_end().trim_end_matches(',').to_string();
for _ in 0..(open_brackets.saturating_sub(close_brackets)) {
truncated.push(']');
}
for _ in 0..(open_braces.saturating_sub(close_braces)) {
truncated.push('}');
}
attempts.push(truncated);
}
attempts
}
fn has_incomplete_string(json: &str) -> bool {
let mut in_string = false;
let mut escape_next = false;
let mut last_quote_pos = None;
for (i, c) in json.chars().enumerate() {
if escape_next {
escape_next = false;
continue;
}
match c {
'\\' if in_string => escape_next = true,
'"' => {
in_string = !in_string;
if in_string {
last_quote_pos = Some(i);
} else {
last_quote_pos = None;
}
}
_ => {}
}
}
in_string && last_quote_pos.is_some()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_complete_json() {
let json = r#"{"name": "John", "age": 30}"#;
let result = try_parse_partial_json(json).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["name"], "John");
assert_eq!(value["age"], 30);
}
#[test]
fn test_incomplete_object() {
let partial = r#"{"name": "John", "age": 30"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["name"], "John");
assert_eq!(value["age"], 30);
}
#[test]
fn test_incomplete_string() {
let partial = r#"{"name": "Joh"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["name"], "Joh");
}
#[test]
fn test_incomplete_array() {
let partial = r#"{"items": [1, 2, 3"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["items"].as_array().unwrap().len(), 3);
}
#[test]
fn test_nested_incomplete() {
let partial = r#"{"person": {"name": "John", "age": 30"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["person"]["name"], "John");
assert_eq!(value["person"]["age"], 30);
}
#[test]
fn test_markdown_extraction() {
let partial = r#"Here's the data:
```json
{"name": "John", "age": 30
```"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
}
#[test]
fn test_markdown_incomplete() {
let partial = r#"```json
{"name": "John", "age": 30"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
}
#[test]
fn test_trailing_comma() {
let partial = r#"{"name": "John", "age": 30,"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["name"], "John");
}
#[test]
fn test_empty_input() {
let result = try_parse_partial_json("").unwrap();
assert!(result.is_none());
}
#[test]
fn test_incomplete_field_name() {
let partial = r#"{"name": "John", "ag"#;
let _result = try_parse_partial_json(partial).unwrap();
}
#[test]
fn test_uppercase_json_code_fence() {
let partial = r#"```JSON
{"name": "John", "age": 30}
```"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["name"], "John");
assert_eq!(value["age"], 30);
}
#[test]
fn test_mixed_case_json_code_fence() {
let partial = r#"```Json
{"name": "Alice"}
```"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["name"], "Alice");
}
#[test]
fn test_uppercase_json_code_fence_incomplete() {
let partial = r#"```JSON
{"name": "John", "age": 30"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["name"], "John");
}
#[test]
fn test_braces_inside_string_value() {
let partial = r#"{"text": "use { for scope"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["text"], "use { for scope");
}
#[test]
fn test_brackets_inside_string_value() {
let partial = r#"{"code": "arr = [1, 2, 3]"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["code"], "arr = [1, 2, 3]");
}
#[test]
fn test_mixed_braces_brackets_in_string() {
let partial = r#"{"message": "JSON: {\"arr\": [1, 2]}"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["message"], r#"JSON: {"arr": [1, 2]}"#);
}
#[test]
fn test_complete_json_with_braces_in_string() {
let json = r#"{"text": "use { for scope and } to close", "count": 5}"#;
let result = try_parse_partial_json(json).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["text"], "use { for scope and } to close");
assert_eq!(value["count"], 5);
}
#[test]
fn test_escaped_quotes_with_braces() {
let partial = r#"{"code": "fn main() { println!(\"hello\"); }"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["code"], "fn main() { println!(\"hello\"); }");
}
#[test]
fn test_deeply_nested_partial_json_3_levels() {
let partial = r#"{"level1": {"level2": {"level3": {"name": "deep""#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["level1"]["level2"]["level3"]["name"], "deep");
}
#[test]
fn test_deeply_nested_partial_json_4_levels() {
let partial = r#"{"a": {"b": {"c": {"d": {"value": 42"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["a"]["b"]["c"]["d"]["value"], 42);
}
#[test]
fn test_deeply_nested_with_arrays() {
let partial = r#"{"data": {"items": [{"name": "first"}, {"name": "second"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["data"]["items"][0]["name"], "first");
}
#[test]
fn test_deeply_nested_complete() {
let json = r#"{"a": {"b": {"c": {"d": {"e": "five"}}}}}"#;
let result = try_parse_partial_json(json).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["a"]["b"]["c"]["d"]["e"], "five");
}
#[test]
fn test_unicode_escape_sequences_complete() {
let json = r#"{"name": "\u0048\u0065\u006c\u006c\u006f"}"#;
let result = try_parse_partial_json(json).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["name"], "Hello");
}
#[test]
fn test_unicode_escape_sequences_partial() {
let partial = r#"{"name": "\u0048\u0065\u006c\u006c\u006f""#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["name"], "Hello");
}
#[test]
fn test_unicode_escape_incomplete_sequence() {
let partial = r#"{"name": "\u004"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_none());
}
#[test]
fn test_unicode_escape_with_other_fields() {
let partial = r#"{"greeting": "\u0048\u0069", "count": 42"#;
let result = try_parse_partial_json(partial).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["greeting"], "Hi");
assert_eq!(value["count"], 42);
}
#[test]
fn test_mixed_unicode_and_regular_text() {
let json = r#"{"text": "Say \u0048\u0069 to everyone"}"#;
let result = try_parse_partial_json(json).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["text"], "Say Hi to everyone");
}
#[test]
fn test_literal_newline_in_string() {
let json = "{\"message\": \"Hello\nWorld\"}";
let result = try_parse_partial_json(json).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["message"], "Hello\nWorld");
}
#[test]
fn test_literal_tab_in_string() {
let json = "{\"message\": \"Hello\tWorld\"}";
let result = try_parse_partial_json(json).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["message"], "Hello\tWorld");
}
#[test]
fn test_multiple_literal_newlines() {
let json = "{\"message\": \"Line 1\nLine 2\nLine 3\"}";
let result = try_parse_partial_json(json).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["message"], "Line 1\nLine 2\nLine 3");
}
#[test]
fn test_literal_crlf_in_string() {
let json = "{\"message\": \"Hello\r\nWorld\"}";
let result = try_parse_partial_json(json).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert_eq!(value["message"], "Hello\r\nWorld");
}
#[test]
fn test_escape_control_chars_preserves_escaped() {
let input = r#"{"text": "already escaped\\n here"}"#;
let escaped = escape_control_chars_in_strings(input);
assert_eq!(escaped, input);
}
#[test]
fn test_escape_control_chars_handles_mixed() {
let input = "{\"text\": \"literal\nnewline and escaped\\n too\"}";
let escaped = escape_control_chars_in_strings(input);
assert_eq!(escaped, "{\"text\": \"literal\\nnewline and escaped\\n too\"}");
}
#[test]
fn test_curly_quotes_in_json_structure() {
let input = "{ \u{201C}tool\u{201D}: \u{201C}Bash\u{201D} }";
let result = try_parse_partial_json(input).unwrap();
assert!(result.is_some(), "Should parse JSON with curly quotes as structure");
let value = result.unwrap();
assert_eq!(value["tool"], "Bash");
}
#[test]
fn test_curly_quotes_inside_string_value() {
let input = "{\"message\": \"blend of \u{201C}cat\u{201D} and \u{201C}ethos\u{201D}\"}";
let result = try_parse_partial_json(input).unwrap();
assert!(result.is_some(), "Should parse JSON with curly quotes inside strings");
let value = result.unwrap();
let msg = value["message"].as_str().unwrap();
assert!(msg.contains("cat"), "Message should contain 'cat'");
assert!(msg.contains("ethos"), "Message should contain 'ethos'");
}
#[test]
fn test_escaped_quotes_in_string_value() {
let input = r#"{"message": "Your username is \"catethos\". It means \"cat\" + \"ethos\"."}"#;
let result = try_parse_partial_json(input).unwrap();
assert!(result.is_some(), "Should parse JSON with escaped quotes");
let value = result.unwrap();
let msg = value["message"].as_str().unwrap();
assert!(msg.contains("catethos"), "Message should contain 'catethos'");
assert!(msg.contains("cat"), "Message should contain 'cat'");
}
#[test]
fn test_final_response_with_escaped_quotes_and_special_chars() {
let input = r#"{
"tool": "FinalResponse",
"message": "Your current username is \"catethos\". It appears to be a playful blend of two words: \"cat\" and \"ethos.\" \"Cat\" evokes the image of a curious, independent feline, while \"ethos\" refers to the characteristic spirit, values, or beliefs of a community or individual."
}"#;
let result = try_parse_partial_json(input).unwrap();
assert!(result.is_some(), "Should parse FinalResponse with escaped quotes");
let value = result.unwrap();
assert_eq!(value["tool"], "FinalResponse");
let msg = value["message"].as_str().unwrap();
assert!(msg.contains("catethos"));
assert!(msg.contains("cat"));
assert!(msg.contains("ethos"));
}
#[test]
fn test_mixed_curly_and_escaped_quotes() {
let input = "{\"msg\": \"He said \\\"hello\\\" and used \u{201C}emphasis\u{201D} marks\"}";
let result = try_parse_partial_json(input).unwrap();
assert!(result.is_some(), "Should parse mixed quote styles");
let value = result.unwrap();
let msg = value["msg"].as_str().unwrap();
assert!(msg.contains("hello"));
assert!(msg.contains("emphasis"));
}
#[test]
fn test_curly_quotes_with_newlines_in_string() {
let input = "{\"message\": \"First line\nSecond with \u{201C}quotes\u{201D}\"}";
let result = try_parse_partial_json(input).unwrap();
assert!(result.is_some(), "Should parse curly quotes with newlines");
let value = result.unwrap();
let msg = value["message"].as_str().unwrap();
assert!(msg.contains("First line"));
assert!(msg.contains("quotes"));
}
#[test]
fn test_normalize_quotes_preserves_already_escaped() {
let input = r#"{"text": "already \"escaped\" here"}"#;
let normalized = normalize_quotes(input);
assert_eq!(normalized, input, "Already escaped quotes should be preserved");
}
#[test]
fn test_normalize_quotes_handles_em_dash() {
let input = r#"{"message": "curiosity—much like a cat"}"#;
let result = try_parse_partial_json(input).unwrap();
assert!(result.is_some());
let value = result.unwrap();
assert!(value["message"].as_str().unwrap().contains("—"));
}
#[test]
fn test_json_with_embedded_markdown_code_blocks() {
let input = r#"{"tool":"FinalResponse","message":"Here's a JSON example:\n\n```json\n{\"data\": [1, 2, 3]}\n```\n\nThis shows how to format data."}"#;
let result = try_parse_partial_json(input).unwrap();
assert!(result.is_some(), "Should parse valid JSON containing embedded markdown");
let value = result.unwrap();
assert_eq!(value["tool"], "FinalResponse");
let msg = value["message"].as_str().unwrap();
assert!(msg.contains("```json"));
assert!(msg.contains("```\n\nThis shows"));
}
#[test]
fn test_json_with_multiple_embedded_code_blocks() {
let input = r#"{"tool":"FinalResponse","message":"Example 1:\n```python\nprint('hello')\n```\n\nExample 2:\n```json\n{\"x\": 1}\n```\nDone."}"#;
let result = try_parse_partial_json(input).unwrap();
assert!(result.is_some(), "Should parse JSON with multiple embedded code blocks");
let value = result.unwrap();
assert_eq!(value["tool"], "FinalResponse");
let msg = value["message"].as_str().unwrap();
assert!(msg.contains("```python"));
assert!(msg.contains("```json"));
}
#[test]
fn test_actual_markdown_wrapped_json_still_works() {
let input = "```json\n{\"tool\": \"FinalResponse\", \"message\": \"Hello\"}\n```";
let result = try_parse_partial_json(input).unwrap();
assert!(result.is_some(), "Should extract JSON from actual markdown wrapper");
let value = result.unwrap();
assert_eq!(value["tool"], "FinalResponse");
assert_eq!(value["message"], "Hello");
}
}