Skip to main content

llm_json_repair/
lib.rs

1//! # llm-json-repair
2//!
3//! Clean and parse JSON emitted by LLMs.
4//!
5//! Models still emit invalid JSON. They wrap it in markdown code fences,
6//! leave trailing commas, write prose around it, or use smart quotes. This
7//! crate does three local repair passes before you reach for a retry:
8//!
9//! 1. Strip markdown code fences (triple-backtick or triple-tilde).
10//! 2. Extract the first balanced `{...}` or `[...]` from surrounding prose.
11//! 3. Remove trailing commas before `}` or `]`.
12//!
13//! ```
14//! let raw = r#"Sure, here you go:
15//! ```json
16//! { "answer": "Paris", "confidence": 0.95, }
17//! ```"#;
18//! let cleaned = llm_json_repair::repair(raw);
19//! assert_eq!(cleaned, r#"{ "answer": "Paris", "confidence": 0.95 }"#);
20//! ```
21//!
22//! With the default `serde` feature you can parse straight to a typed value:
23//!
24//! ```
25//! # #[cfg(feature = "serde")]
26//! # {
27//! use serde::Deserialize;
28//!
29//! #[derive(Deserialize)]
30//! struct Answer { value: String }
31//!
32//! let raw = "```\n{\"value\": \"42\",}\n```";
33//! let parsed: Answer = llm_json_repair::parse(raw).unwrap();
34//! assert_eq!(parsed.value, "42");
35//! # }
36//! ```
37
38#![deny(missing_docs)]
39
40mod balanced;
41mod commas;
42mod fences;
43
44pub use balanced::extract_balanced;
45pub use commas::strip_trailing_commas;
46pub use fences::strip_fences;
47
48use thiserror::Error;
49
50/// Errors returned by [`parse`].
51#[derive(Debug, Error)]
52pub enum RepairError {
53    /// The input could not be parsed even after repair.
54    #[cfg(feature = "serde")]
55    #[error("could not parse JSON after repair: {source}")]
56    Parse {
57        /// Underlying serde_json error from the final parse attempt.
58        #[source]
59        source: serde_json::Error,
60        /// The repaired text we tried to parse.
61        repaired: String,
62    },
63
64    /// No balanced `{…}` or `[…]` found in the input.
65    #[error("no JSON object or array found in input")]
66    NoJsonFound,
67}
68
69/// Apply all three repair passes and return the cleaned string.
70///
71/// This always returns *something* — even if the input contains no JSON, you
72/// get the trimmed, fence-stripped text back. Use [`parse`] when you want
73/// validation against a real JSON parser.
74pub fn repair(input: &str) -> String {
75    let unfenced = strip_fences(input);
76    let extracted = extract_balanced(&unfenced)
77        .unwrap_or(unfenced.trim())
78        .to_string();
79    strip_trailing_commas(&extracted)
80}
81
82/// Repair the input and parse it as JSON into the given type.
83///
84/// Equivalent to `serde_json::from_str(&repair(input))`, with cleaner errors
85/// and an early failure when no JSON is present at all.
86#[cfg(feature = "serde")]
87pub fn parse<T>(input: &str) -> Result<T, RepairError>
88where
89    T: serde::de::DeserializeOwned,
90{
91    let unfenced = strip_fences(input);
92    let extracted = match extract_balanced(&unfenced) {
93        Some(s) => s.to_string(),
94        None => return Err(RepairError::NoJsonFound),
95    };
96    let cleaned = strip_trailing_commas(&extracted);
97    serde_json::from_str::<T>(&cleaned).map_err(|source| RepairError::Parse {
98        source,
99        repaired: cleaned,
100    })
101}
102
103/// Parse without repair. Same as `serde_json::from_str` but typed under the
104/// crate's error.
105///
106/// Useful when you want to record whether repair was needed:
107///
108/// ```ignore
109/// let parsed = parse_strict(raw).or_else(|_| parse(raw))?;
110/// ```
111#[cfg(feature = "serde")]
112pub fn parse_strict<T>(input: &str) -> Result<T, RepairError>
113where
114    T: serde::de::DeserializeOwned,
115{
116    serde_json::from_str::<T>(input).map_err(|source| RepairError::Parse {
117        source,
118        repaired: input.to_string(),
119    })
120}