Skip to main content

lean_ctx/core/
jsonc.rs

1use serde_json::Value;
2
3/// Strip `//` line comments, `/* */` block comments and trailing commas from
4/// JSONC, then parse with serde_json. String contents are preserved verbatim.
5///
6/// This makes lean-ctx tolerant of the JSONC dialect that editors like VS Code
7/// use for `settings.json` / `mcp.json` (comments + trailing commas are valid
8/// there but rejected by strict JSON). See issue #311.
9pub fn parse_jsonc(input: &str) -> Result<Value, serde_json::Error> {
10    let stripped = strip_json_comments(input);
11    let cleaned = strip_trailing_commas(&stripped);
12    serde_json::from_str(&cleaned)
13}
14
15fn strip_json_comments(input: &str) -> String {
16    let bytes = input.as_bytes();
17    let len = bytes.len();
18    let mut out = String::with_capacity(len);
19    let mut i = 0;
20    let mut seg = 0;
21
22    while i < len {
23        let b = bytes[i];
24
25        if b == b'"' {
26            i += 1;
27            while i < len {
28                let c = bytes[i];
29                i += 1;
30                if c == b'\\' && i < len {
31                    i += 1;
32                } else if c == b'"' {
33                    break;
34                }
35            }
36            continue;
37        }
38
39        if b == b'/' && i + 1 < len {
40            if bytes[i + 1] == b'/' {
41                out.push_str(&input[seg..i]);
42                i += 2;
43                while i < len && bytes[i] != b'\n' {
44                    i += 1;
45                }
46                seg = i;
47                continue;
48            }
49            if bytes[i + 1] == b'*' {
50                out.push_str(&input[seg..i]);
51                i += 2;
52                while i + 1 < len {
53                    if bytes[i] == b'*' && bytes[i + 1] == b'/' {
54                        i += 2;
55                        break;
56                    }
57                    i += 1;
58                }
59                seg = i;
60                continue;
61            }
62        }
63
64        i += 1;
65    }
66
67    out.push_str(&input[seg..]);
68    out
69}
70
71/// Remove trailing commas that appear before a closing `}` or `]`.
72/// String contents are preserved verbatim (commas inside strings are ignored).
73///
74/// Operates on already comment-stripped input. Uses byte-segment copying so
75/// multi-byte UTF-8 sequences are never split (all decision bytes are ASCII).
76fn strip_trailing_commas(input: &str) -> String {
77    let bytes = input.as_bytes();
78    let len = bytes.len();
79    let mut out = String::with_capacity(len);
80    let mut i = 0;
81    let mut seg = 0;
82
83    while i < len {
84        let b = bytes[i];
85
86        if b == b'"' {
87            i += 1;
88            while i < len {
89                let c = bytes[i];
90                i += 1;
91                if c == b'\\' && i < len {
92                    i += 1;
93                } else if c == b'"' {
94                    break;
95                }
96            }
97            continue;
98        }
99
100        if b == b',' {
101            let mut j = i + 1;
102            while j < len && bytes[j].is_ascii_whitespace() {
103                j += 1;
104            }
105            if j < len && (bytes[j] == b'}' || bytes[j] == b']') {
106                out.push_str(&input[seg..i]);
107                i += 1;
108                seg = i;
109                continue;
110            }
111        }
112
113        i += 1;
114    }
115
116    out.push_str(&input[seg..]);
117    out
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123
124    #[test]
125    fn strips_line_comments() {
126        let input = r#"{
127  // this is a comment
128  "key": "value"
129}"#;
130        let v = parse_jsonc(input).unwrap();
131        assert_eq!(v["key"], "value");
132    }
133
134    #[test]
135    fn strips_block_comments() {
136        let input = r#"{
137  /* block
138     comment */
139  "key": "value"
140}"#;
141        let v = parse_jsonc(input).unwrap();
142        assert_eq!(v["key"], "value");
143    }
144
145    #[test]
146    fn preserves_slashes_in_strings() {
147        let input = r#"{"url": "https://example.com/path"}"#;
148        let v = parse_jsonc(input).unwrap();
149        assert_eq!(v["url"], "https://example.com/path");
150    }
151
152    #[test]
153    fn preserves_comment_like_content_in_strings() {
154        let input = r#"{"note": "see // inline", "code": "/* not a comment */"}"#;
155        let v = parse_jsonc(input).unwrap();
156        assert_eq!(v["note"], "see // inline");
157        assert_eq!(v["code"], "/* not a comment */");
158    }
159
160    #[test]
161    fn handles_escaped_quotes_in_strings() {
162        let input = r#"{"msg": "say \"hello\" // world"}"#;
163        let v = parse_jsonc(input).unwrap();
164        assert_eq!(v["msg"], r#"say "hello" // world"#);
165    }
166
167    #[test]
168    fn handles_trailing_comma_free_json() {
169        let input = r#"{
170  "a": 1,
171  // comment between entries
172  "b": 2
173}"#;
174        let v = parse_jsonc(input).unwrap();
175        assert_eq!(v["a"], 1);
176        assert_eq!(v["b"], 2);
177    }
178
179    #[test]
180    fn empty_input() {
181        assert!(parse_jsonc("").is_err());
182    }
183
184    // --- #311: trailing comma support (VS Code / JSONC dialect) ---
185
186    #[test]
187    fn strips_trailing_comma_in_object() {
188        let input = r#"{
189  "a": 1,
190  "b": 2,
191}"#;
192        let v = parse_jsonc(input).unwrap();
193        assert_eq!(v["a"], 1);
194        assert_eq!(v["b"], 2);
195    }
196
197    #[test]
198    fn strips_trailing_comma_in_array() {
199        let input = r#"{"list": [1, 2, 3,]}"#;
200        let v = parse_jsonc(input).unwrap();
201        assert_eq!(v["list"][2], 3);
202    }
203
204    #[test]
205    fn strips_trailing_comma_with_whitespace_and_newlines() {
206        let input = "{\n  \"a\": 1  ,\n\n}";
207        let v = parse_jsonc(input).unwrap();
208        assert_eq!(v["a"], 1);
209    }
210
211    #[test]
212    fn strips_nested_trailing_commas() {
213        let input = r#"{
214  "outer": {
215    "inner": [
216      "x",
217      "y",
218    ],
219  },
220}"#;
221        let v = parse_jsonc(input).unwrap();
222        assert_eq!(v["outer"]["inner"][1], "y");
223    }
224
225    #[test]
226    fn preserves_comma_inside_string_before_brace() {
227        // A comma inside a string value must not be treated as trailing.
228        let input = r#"{"msg": "hello, world"}"#;
229        let v = parse_jsonc(input).unwrap();
230        assert_eq!(v["msg"], "hello, world");
231    }
232
233    #[test]
234    fn issue_311_vscode_settings_with_trailing_comma_and_comments() {
235        // Mirrors the real VS Code user settings.json that triggered #311:
236        // "trailing comma at line 4 column 5" plus JSONC comments.
237        let input = r#"{
238  // editor settings
239  "editor.fontSize": 14,
240  "editor.tabSize": 2,
241  "chat.mcp.enabled": true,
242}"#;
243        let v = parse_jsonc(input).unwrap();
244        assert_eq!(v["editor.fontSize"], 14);
245        assert!(v["chat.mcp.enabled"].as_bool().unwrap());
246    }
247
248    #[test]
249    fn pure_json_passthrough() {
250        let input = r#"{"key": "value", "num": 42}"#;
251        let v = parse_jsonc(input).unwrap();
252        assert_eq!(v["key"], "value");
253        assert_eq!(v["num"], 42);
254    }
255
256    #[test]
257    fn real_opencode_config_with_comments() {
258        let input = r#"{
259  // OpenCode configuration
260  "$schema": "https://opencode.ai/config.json",
261  "mcp": {
262    /* existing tool */
263    "my-tool": {
264      "type": "local",
265      "command": ["my-tool"],
266      "enabled": true
267    }
268  }
269}"#;
270        let v = parse_jsonc(input).unwrap();
271        assert_eq!(v["$schema"], "https://opencode.ai/config.json");
272        assert!(v["mcp"]["my-tool"]["enabled"].as_bool().unwrap());
273    }
274
275    #[test]
276    fn utf8_umlauts_preserved() {
277        let input = "{\n  // German names\n  \"name\": \"Müller\",\n  \"city\": \"Zürich\"\n}";
278        let v = parse_jsonc(input).unwrap();
279        assert_eq!(v["name"], "Müller");
280        assert_eq!(v["city"], "Zürich");
281    }
282
283    #[test]
284    fn utf8_cjk_with_block_comment() {
285        let input = "{\n  /* 日本語コメント */\n  \"desc\": \"日本語テスト\"\n}";
286        let v = parse_jsonc(input).unwrap();
287        assert_eq!(v["desc"], "日本語テスト");
288    }
289
290    #[test]
291    fn utf8_emoji_between_comments() {
292        let input = "{\n  // before\n  \"icon\": \"🚀🔥\",\n  /* after */\n  \"ok\": true\n}";
293        let v = parse_jsonc(input).unwrap();
294        assert_eq!(v["icon"], "🚀🔥");
295        assert!(v["ok"].as_bool().unwrap());
296    }
297
298    #[test]
299    fn utf8_in_comment_stripped_cleanly() {
300        let input = "{\n  // Achtung: ä ö ü ß\n  \"key\": \"value\"\n}";
301        let v = parse_jsonc(input).unwrap();
302        assert_eq!(v["key"], "value");
303    }
304
305    #[test]
306    fn utf8_in_key() {
307        let input = "{\"straße\": \"Hauptstraße 42\"}";
308        let v = parse_jsonc(input).unwrap();
309        assert_eq!(v["straße"], "Hauptstraße 42");
310    }
311
312    #[test]
313    fn mixed_ascii_and_utf8_values() {
314        let input = "{\n  // config\n  \"en\": \"hello\",\n  \"ru\": \"привет\",\n  \"jp\": \"こんにちは\"\n}";
315        let v = parse_jsonc(input).unwrap();
316        assert_eq!(v["en"], "hello");
317        assert_eq!(v["ru"], "привет");
318        assert_eq!(v["jp"], "こんにちは");
319    }
320
321    #[test]
322    fn escaped_unicode_unchanged() {
323        let input = r#"{"test": "\u00e4\u00f6\u00fc"}"#;
324        let v = parse_jsonc(input).unwrap();
325        assert_eq!(v["test"], "\u{00e4}\u{00f6}\u{00fc}");
326    }
327
328    #[test]
329    fn utf8_at_comment_boundary() {
330        let input = "{\n  \"before\": \"текст\"// комментарий\n, \"after\": 1\n}";
331        let v = parse_jsonc(input).unwrap();
332        assert_eq!(v["before"], "текст");
333        assert_eq!(v["after"], 1);
334    }
335
336    #[test]
337    fn empty_string_after_utf8_comment() {
338        let input = "{\n  // Ü\n  \"key\": \"\"\n}";
339        let v = parse_jsonc(input).unwrap();
340        assert_eq!(v["key"], "");
341    }
342
343    #[test]
344    fn real_claude_settings_with_german_paths() {
345        let input = r#"{
346  // Claude Code Einstellungen
347  "mcpServers": {
348    /* Lean-CTX Konfiguration für /Users/müller/Projekte */
349    "lean-ctx": {
350      "command": "/Users/müller/.local/bin/lean-ctx",
351      "args": ["--project", "/Users/müller/Projekte/größtes-projekt"]
352    }
353  }
354}"#;
355        let v = parse_jsonc(input).unwrap();
356        assert_eq!(
357            v["mcpServers"]["lean-ctx"]["command"],
358            "/Users/müller/.local/bin/lean-ctx"
359        );
360        let args = v["mcpServers"]["lean-ctx"]["args"].as_array().unwrap();
361        assert_eq!(args[1], "/Users/müller/Projekte/größtes-projekt");
362    }
363}