Skip to main content

yaml_repair/
lib.rs

1//! # yaml-repair
2//!
3//! Repair messy YAML emitted by LLMs into something a real YAML parser
4//! will accept.
5//!
6//! Fixes applied:
7//!
8//! 1. Strip ```yaml / ``` fences and surrounding prose.
9//! 2. Normalize CRLF and CR line endings to LF.
10//! 3. Convert leading tabs to 2 spaces (YAML forbids leading tabs in
11//!    indentation).
12//! 4. Dedent so the shallowest non-empty line is at column 0.
13//! 5. Trim trailing whitespace on each line.
14//!
15//! ## Example
16//!
17//! ```
18//! use yaml_repair::repair;
19//! let raw = "Sure:\n```yaml\n  name: Claude\n  tools:\n    - read\n    - write\n```";
20//! let fixed = repair(raw);
21//! assert!(fixed.starts_with("name: Claude"));
22//! assert!(!fixed.contains("```"));
23//! ```
24
25#![deny(missing_docs)]
26
27/// Clean `raw` and return YAML-parser-ready text.
28pub fn repair(raw: &str) -> String {
29    let mut s = strip_fences(raw);
30    s = normalize_line_endings(&s);
31    s = tabs_to_spaces(&s);
32    s = trim_trailing_ws(&s);
33    s = dedent(&s);
34    while s.ends_with('\n') {
35        s.pop();
36    }
37    s
38}
39
40fn strip_fences(s: &str) -> String {
41    // Reuse the inner-block logic: find first ``` line, take until next ```.
42    let bytes = s.as_bytes();
43    let mut i = 0;
44    while i + 2 < bytes.len() {
45        if &bytes[i..i + 3] == b"```" {
46            let mut start = i + 3;
47            while start < bytes.len() && bytes[start] != b'\n' {
48                start += 1;
49            }
50            if start >= bytes.len() {
51                return s.to_string();
52            }
53            start += 1;
54            let mut j = start;
55            while j + 3 <= bytes.len() {
56                if &bytes[j..j + 3] == b"```" {
57                    let prev = j.checked_sub(1).map(|k| bytes[k]).unwrap_or(b'\n');
58                    if prev == b'\n' {
59                        return s[start..j].to_string();
60                    }
61                }
62                j += 1;
63            }
64            return s.to_string();
65        }
66        i += 1;
67    }
68    s.to_string()
69}
70
71fn normalize_line_endings(s: &str) -> String {
72    s.replace("\r\n", "\n").replace('\r', "\n")
73}
74
75fn tabs_to_spaces(s: &str) -> String {
76    let mut out = String::with_capacity(s.len());
77    for line in s.split_inclusive('\n') {
78        // Convert tabs only in the leading indentation.
79        let mut chars = line.chars().peekable();
80        let mut in_indent = true;
81        for c in chars.by_ref() {
82            if in_indent && c == '\t' {
83                out.push_str("  ");
84            } else {
85                if c != ' ' && c != '\t' && c != '\n' {
86                    in_indent = false;
87                }
88                out.push(c);
89            }
90        }
91    }
92    out
93}
94
95fn trim_trailing_ws(s: &str) -> String {
96    let mut out = String::with_capacity(s.len());
97    for line in s.split_inclusive('\n') {
98        let had_nl = line.ends_with('\n');
99        let core = if had_nl { &line[..line.len() - 1] } else { line };
100        let stripped = core.trim_end_matches(|c: char| c == ' ' || c == '\t');
101        out.push_str(stripped);
102        if had_nl {
103            out.push('\n');
104        }
105    }
106    out
107}
108
109fn dedent(s: &str) -> String {
110    let min_indent = s
111        .lines()
112        .filter(|l| !l.trim().is_empty())
113        .map(|l| l.chars().take_while(|c| *c == ' ').count())
114        .min()
115        .unwrap_or(0);
116    if min_indent == 0 {
117        return s.to_string();
118    }
119    s.lines()
120        .map(|l| {
121            if l.len() >= min_indent {
122                &l[min_indent..]
123            } else {
124                l
125            }
126        })
127        .collect::<Vec<_>>()
128        .join("\n")
129}