Skip to main content

toml_repair/
lib.rs

1//! # toml-repair
2//!
3//! Repair messy TOML emitted by LLMs into something a real TOML parser
4//! will accept.
5//!
6//! Fixes applied:
7//!
8//! 1. Strip ```toml / ``` fences and surrounding prose.
9//! 2. Normalize CRLF and CR line endings to LF.
10//! 3. Trim trailing whitespace.
11//! 4. Convert smart quotes (`“ ”`) on string values to ASCII `"`.
12//! 5. Strip trailing commas inside inline tables / arrays (TOML permits
13//!    them in arrays only; we play it safe by leaving valid ones alone).
14//!
15//! ## Example
16//!
17//! ```
18//! use toml_repair::repair;
19//! let raw = "```toml\nname = “Claude”\nversion = \"4.5\"\n```";
20//! let fixed = repair(raw);
21//! assert!(fixed.contains("name = \"Claude\""));
22//! ```
23
24#![deny(missing_docs)]
25
26/// Clean `raw` and return TOML-parser-ready text.
27pub fn repair(raw: &str) -> String {
28    let mut s = strip_fences(raw);
29    s = s.replace("\r\n", "\n").replace('\r', "\n");
30    s = unsmart_quotes(&s);
31    s = trim_trailing_ws(&s);
32    while s.ends_with('\n') {
33        s.pop();
34    }
35    s
36}
37
38fn strip_fences(s: &str) -> String {
39    let bytes = s.as_bytes();
40    let mut i = 0;
41    while i + 2 < bytes.len() {
42        if &bytes[i..i + 3] == b"```" {
43            let mut start = i + 3;
44            while start < bytes.len() && bytes[start] != b'\n' {
45                start += 1;
46            }
47            if start >= bytes.len() {
48                return s.to_string();
49            }
50            start += 1;
51            let mut j = start;
52            while j + 3 <= bytes.len() {
53                if &bytes[j..j + 3] == b"```" {
54                    let prev = j.checked_sub(1).map(|k| bytes[k]).unwrap_or(b'\n');
55                    if prev == b'\n' {
56                        return s[start..j].to_string();
57                    }
58                }
59                j += 1;
60            }
61            return s.to_string();
62        }
63        i += 1;
64    }
65    s.to_string()
66}
67
68fn unsmart_quotes(s: &str) -> String {
69    s.replace('\u{201C}', "\"")
70        .replace('\u{201D}', "\"")
71        .replace('\u{2018}', "'")
72        .replace('\u{2019}', "'")
73}
74
75fn trim_trailing_ws(s: &str) -> String {
76    let mut out = String::with_capacity(s.len());
77    for line in s.split_inclusive('\n') {
78        let had_nl = line.ends_with('\n');
79        let core = if had_nl { &line[..line.len() - 1] } else { line };
80        let stripped = core.trim_end_matches(|c: char| c == ' ' || c == '\t');
81        out.push_str(stripped);
82        if had_nl {
83            out.push('\n');
84        }
85    }
86    out
87}