Skip to main content

sqlite_graphrag/
json_repair.rs

1//! JSON repair for malformed LLM responses (v1.0.97).
2//!
3//! OpenRouter chat models — notably `deepseek/deepseek-v4-flash:nitro`, which
4//! does not reliably honour `json_schema` strict mode — frequently wrap their
5//! output in markdown code fences, leave trailing commas, or omit quotes around
6//! keys. This module parses such payloads defensively: a strict `serde_json`
7//! pass runs first so well-formed responses pay zero repair cost, and only on
8//! failure does the `llm_json` repair pass (a Rust port of the Python
9//! `json_repair` library) run before a second parse attempt.
10
11use llm_json::{loads, RepairOptions};
12use serde_json::Value;
13
14/// Parse `input` into a [`serde_json::Value`], repairing common LLM JSON
15/// defects when a strict parse fails.
16///
17/// Strategy:
18/// 1. Try `serde_json::from_str` directly — the fast path for valid JSON.
19/// 2. On failure, run `llm_json::loads`, which repairs the string (markdown
20///    fences, trailing commas, unquoted keys, missing brackets) and parses it
21///    to a `Value` in a single pass.
22/// 3. Return an error only when `llm_json` itself fails (an I/O or UTF-8
23///    fault). `llm_json` coerces aggressively — arbitrary text becomes a JSON
24///    string, empty input becomes `{}`, and a lone delimiter becomes `null` —
25///    so callers MUST validate the returned `Value`'s shape rather than
26///    relying on `Err` for semantically-wrong-but-parseable input.
27pub fn repair_to_value(input: &str) -> anyhow::Result<Value> {
28    match serde_json::from_str::<Value>(input) {
29        Ok(value) => Ok(value),
30        Err(strict_err) => loads(input, &RepairOptions::default()).map_err(|repair_err| {
31            anyhow::anyhow!(
32                "failed to parse JSON even after repair: strict error = {strict_err}; \
33                 repair error = {repair_err}"
34            )
35        }),
36    }
37}
38
39#[cfg(test)]
40mod tests {
41    use super::*;
42
43    #[test]
44    fn parses_already_valid_json_unchanged() {
45        let value = repair_to_value(r#"{"name":"qwen","dim":384}"#).unwrap();
46        assert_eq!(value["name"], "qwen");
47        assert_eq!(value["dim"], 384);
48    }
49
50    #[test]
51    fn repairs_unquoted_keys_and_trailing_comma() {
52        // Typical LLM defect: single-quoted strings, unquoted key, trailing comma.
53        let value = repair_to_value(r#"{name: 'John', age: 30,}"#).unwrap();
54        assert_eq!(value["name"], "John");
55        assert_eq!(value["age"], 30);
56    }
57
58    #[test]
59    fn repairs_markdown_fenced_payload() {
60        // Models often wrap JSON in a ```json code fence.
61        let fenced = "```json\n{\"entities\": [\"rust\", \"sqlite\"]}\n```";
62        let value = repair_to_value(fenced).unwrap();
63        assert_eq!(value["entities"][0], "rust");
64        assert_eq!(value["entities"][1], "sqlite");
65    }
66
67    #[test]
68    fn coerces_non_json_text_into_a_value() {
69        // `llm_json` repairs aggressively: free text becomes a JSON string and
70        // empty input becomes an empty object. `repair_to_value` therefore
71        // returns a `Value` instead of erroring, so callers must validate shape.
72        let text = repair_to_value("this is not json at all <<<").unwrap();
73        assert_eq!(
74            text,
75            Value::String("this is not json at all <<<".to_string())
76        );
77
78        let empty = repair_to_value("").unwrap();
79        assert_eq!(empty, serde_json::json!({}));
80    }
81}