1use anyhow::{Context, Result};
2use serde::Deserialize;
3use serde_json::Value;
4
5pub fn repair_unquoted_json(input: &str) -> String {
11 #[derive(Clone, Copy, PartialEq)]
12 enum Context {
13 Object,
14 Array,
15 }
16
17 #[derive(Clone, Copy, PartialEq)]
18 enum Expecting {
19 Key,
20 Value,
21 ArrayElement,
22 }
23
24 let mut out = String::with_capacity(input.len() + 64);
25 let bytes = input.as_bytes();
26 let len = bytes.len();
27 let mut i = 0;
28 let mut in_string = false;
29 let mut context_stack: Vec<Context> = Vec::new();
30 let mut expecting = Expecting::Value; let mut ever_opened = false; while i < len {
34 let b = bytes[i];
35
36 if in_string {
37 out.push(b as char);
38 if b == b'\\' && i + 1 < len {
39 i += 1;
40 out.push(bytes[i] as char);
41 } else if b == b'"' {
42 in_string = false;
43 }
44 i += 1;
45 continue;
46 }
47
48 match b {
49 b'"' => {
50 in_string = true;
51 out.push('"');
52 i += 1;
53 }
54 b'{' => {
55 out.push('{');
56 context_stack.push(Context::Object);
57 expecting = Expecting::Key;
58 ever_opened = true;
59 i += 1;
60 }
61 b'[' => {
62 out.push('[');
63 context_stack.push(Context::Array);
64 expecting = Expecting::ArrayElement;
65 ever_opened = true;
66 i += 1;
67 }
68 b'}' => {
69 out.push('}');
70 context_stack.pop();
71 if ever_opened && context_stack.is_empty() {
72 out.push_str(&input[i + 1..]);
74 return out;
75 }
76 i += 1;
77 }
78 b']' => {
79 out.push(']');
80 context_stack.pop();
81 if ever_opened && context_stack.is_empty() {
82 out.push_str(&input[i + 1..]);
83 return out;
84 }
85 i += 1;
86 }
87 b':' => {
88 out.push(':');
89 expecting = Expecting::Value;
90 i += 1;
91 }
92 b',' => {
93 out.push(',');
94 expecting = match context_stack.last() {
95 Some(Context::Object) => Expecting::Key,
96 Some(Context::Array) => Expecting::ArrayElement,
97 None => Expecting::Value,
98 };
99 i += 1;
100 }
101 b if b.is_ascii_whitespace() => {
102 out.push(b as char);
103 i += 1;
104 }
105 _ => {
106 let start = i;
108 if expecting == Expecting::Key {
109 while i < len
111 && (bytes[i].is_ascii_alphanumeric()
112 || bytes[i] == b'_'
113 || bytes[i] == b'-')
114 {
115 i += 1;
116 }
117 let token = &input[start..i];
118 out.push('"');
119 out.push_str(token);
120 out.push('"');
121 } else {
122 while i < len && bytes[i] != b',' && bytes[i] != b'}' && bytes[i] != b']' {
124 i += 1;
125 }
126 let token = input[start..i].trim();
127 if token == "true"
129 || token == "false"
130 || token == "null"
131 || token.parse::<f64>().is_ok()
132 {
133 out.push_str(token);
134 } else {
135 out.push('"');
136 out.push_str(token);
137 out.push('"');
138 }
139 }
140 }
141 }
142 }
143
144 out
145}
146
147pub fn extract_json_array(json_str: &str, path: &str) -> Result<Vec<Value>> {
158 if let Ok(root) = serde_json::from_str::<Value>(json_str) {
160 let target = resolve_path(&root, path)?;
161 return match target {
162 Value::Array(arr) => Ok(arr.clone()),
163 _ => anyhow::bail!("path '{}' does not point to an array", path),
164 };
165 }
166
167 if let Some(json_block) = extract_fenced_json(json_str) {
169 if let Ok(root) = serde_json::from_str::<Value>(&json_block) {
170 if let Ok(target) = resolve_path(&root, path) {
171 return match target {
172 Value::Array(arr) => Ok(arr.clone()),
173 _ => anyhow::bail!("path '{}' does not point to an array", path),
174 };
175 }
176 }
177 }
178
179 let repaired = repair_unquoted_json(json_str);
181 if repaired != json_str {
182 if let Ok(root) = serde_json::from_str::<Value>(&repaired) {
183 if let Ok(target) = resolve_path(&root, path) {
184 return match target {
185 Value::Array(arr) => Ok(arr.clone()),
186 _ => anyhow::bail!("path '{}' does not point to an array", path),
187 };
188 }
189 }
190 }
191
192 if let Some(arr) = scan_for_json_with_path(json_str, path) {
194 return Ok(arr);
195 }
196
197 anyhow::bail!("no valid JSON containing path '{}' found in text", path)
198}
199
200fn extract_fenced_json(text: &str) -> Option<String> {
202 let fence_start_markers = ["```json\n", "```json\r\n", "```\n", "```\r\n"];
204 for marker in &fence_start_markers {
205 if let Some(start) = text.find(marker) {
206 let content_start = start + marker.len();
207 if let Some(end) = text[content_start..].find("```") {
208 return Some(text[content_start..content_start + end].trim().to_string());
209 }
210 }
211 }
212 None
213}
214
215fn scan_for_json_with_path(text: &str, path: &str) -> Option<Vec<Value>> {
219 for (i, ch) in text.char_indices() {
220 if ch != '{' && ch != '[' {
221 continue;
222 }
223 let slice = &text[i..];
224 let mut de = serde_json::Deserializer::from_str(slice);
225 if let Ok(root) = <Value as Deserialize>::deserialize(&mut de) {
226 if let Ok(Value::Array(arr)) = resolve_path(&root, path) {
227 return Some(arr.clone());
228 }
229 }
230 let repaired = repair_unquoted_json(slice);
232 if repaired != slice {
233 let mut de = serde_json::Deserializer::from_str(&repaired);
234 if let Ok(root) = <Value as Deserialize>::deserialize(&mut de) {
235 if let Ok(Value::Array(arr)) = resolve_path(&root, path) {
236 return Some(arr.clone());
237 }
238 }
239 }
240 }
241 None
242}
243
244pub fn extract_field(value: &Value, path: &str) -> Option<String> {
249 let resolved = resolve_path(value, path).ok()?;
250 match resolved {
251 Value::String(s) => Some(s.clone()),
252 Value::Null => None,
253 other => Some(other.to_string()),
254 }
255}
256
257pub fn extract_stream_json_result(content: &str) -> Option<String> {
259 for line in content.lines().rev() {
260 let trimmed = line.trim();
261 if trimmed.is_empty() {
262 continue;
263 }
264 if trimmed.contains("\"type\":\"result\"") || trimmed.contains("\"type\": \"result\"") {
265 if let Ok(parsed) = serde_json::from_str::<Value>(trimmed) {
266 if let Some(result) = parsed.get("result").and_then(|v| v.as_str()) {
267 return Some(result.to_string());
268 }
269 }
270 if let Some(extracted) = extract_result_field_raw(trimmed) {
271 return Some(extracted);
272 }
273 }
274 }
275 None
276}
277
278fn resolve_path<'a>(root: &'a Value, path: &str) -> Result<&'a Value> {
279 if path == "$" {
280 return Ok(root);
281 }
282 let path = path.strip_prefix("$.").unwrap_or(path);
283 let mut current = root;
284 for segment in path.split('.') {
285 if segment.is_empty() {
286 continue;
287 }
288 current = current
289 .get(segment)
290 .with_context(|| format!("field '{}' not found", segment))?;
291 }
292 Ok(current)
293}
294
295fn extract_result_field_raw(line: &str) -> Option<String> {
296 let marker = "\"result\":\"";
297 let pos = line.find(marker)?;
298 let value_start = pos + marker.len();
299 let bytes = line.as_bytes();
300
301 let mut i = value_start;
302 let mut result = String::new();
303 while i < bytes.len() {
304 match bytes[i] {
305 b'\\' if i + 1 < bytes.len() => {
306 match bytes[i + 1] {
307 b'"' => result.push('"'),
308 b'\\' => result.push('\\'),
309 b'n' => result.push('\n'),
310 b'r' => result.push('\r'),
311 b't' => result.push('\t'),
312 b'/' => result.push('/'),
313 b'u' if i + 5 < bytes.len() => {
314 let hex = &line[i + 2..i + 6];
315 if let Ok(cp) = u32::from_str_radix(hex, 16) {
316 if let Some(ch) = char::from_u32(cp) {
317 result.push(ch);
318 }
319 }
320 i += 6;
321 continue;
322 }
323 other => {
324 result.push('\\');
325 result.push(other as char);
326 }
327 }
328 i += 2;
329 }
330 b'"' => return Some(result),
331 _ => {
332 result.push(bytes[i] as char);
333 i += 1;
334 }
335 }
336 }
337 None
338}
339
340#[cfg(test)]
341mod tests {
342 use super::*;
343 use serde_json::json;
344
345 #[test]
346 fn extract_array_simple() {
347 let json = r#"{"goals": ["a", "b", "c"]}"#;
348 let arr = extract_json_array(json, "$.goals").expect("extract goals");
349 assert_eq!(arr.len(), 3);
350 assert_eq!(arr[0], json!("a"));
351 }
352
353 #[test]
354 fn extract_array_nested() {
355 let json = r#"{"result": {"items": [1, 2]}}"#;
356 let arr = extract_json_array(json, "$.result.items").expect("extract nested");
357 assert_eq!(arr.len(), 2);
358 }
359
360 #[test]
361 fn extract_array_not_array_fails() {
362 let json = r#"{"goals": "not_array"}"#;
363 let result = extract_json_array(json, "$.goals");
364 assert!(result.is_err());
365 }
366
367 #[test]
368 fn extract_array_missing_field_fails() {
369 let json = r#"{"goals": []}"#;
370 let result = extract_json_array(json, "$.missing");
371 assert!(result.is_err());
372 }
373
374 #[test]
375 fn extract_field_string() {
376 let value = json!({"name": "test", "score": 42});
377 assert_eq!(extract_field(&value, "$.name"), Some("test".to_string()));
378 }
379
380 #[test]
381 fn extract_field_number() {
382 let value = json!({"score": 42});
383 assert_eq!(extract_field(&value, "$.score"), Some("42".to_string()));
384 }
385
386 #[test]
387 fn extract_field_nested() {
388 let value = json!({"meta": {"id": "abc"}});
389 assert_eq!(extract_field(&value, "$.meta.id"), Some("abc".to_string()));
390 }
391
392 #[test]
393 fn extract_field_missing_returns_none() {
394 let value = json!({"name": "test"});
395 assert_eq!(extract_field(&value, "$.missing"), None);
396 }
397
398 #[test]
399 fn extract_field_null_returns_none() {
400 let value = json!({"name": null});
401 assert_eq!(extract_field(&value, "$.name"), None);
402 }
403
404 #[test]
405 fn extract_field_boolean() {
406 let value = json!({"active": true});
407 assert_eq!(extract_field(&value, "$.active"), Some("true".to_string()));
408 }
409
410 #[test]
411 fn extract_array_from_mixed_text_with_preamble() {
412 let mixed = r#"Based on my analysis, I identified these targets:
413
414{"regression_targets": [{"id": "target-a", "name": "A"}, {"id": "target-b", "name": "B"}]}"#;
415 let arr = extract_json_array(mixed, "$.regression_targets")
416 .expect("should extract from mixed text");
417 assert_eq!(arr.len(), 2);
418 assert_eq!(arr[0]["id"], json!("target-a"));
419 assert_eq!(arr[1]["id"], json!("target-b"));
420 }
421
422 #[test]
423 fn extract_array_from_fenced_code_block() {
424 let fenced = r#"Here are the results:
425
426```json
427{"items": [{"id": "a"}, {"id": "b"}, {"id": "c"}]}
428```
429
430Done."#;
431 let arr = extract_json_array(fenced, "$.items").expect("should extract from fenced block");
432 assert_eq!(arr.len(), 3);
433 }
434
435 #[test]
436 fn extract_array_from_unfenced_code_block() {
437 let fenced = r#"Results:
438
439```
440{"goals": ["x", "y"]}
441```
442"#;
443 let arr =
444 extract_json_array(fenced, "$.goals").expect("should extract from unfenced block");
445 assert_eq!(arr.len(), 2);
446 }
447
448 #[test]
449 fn extract_array_multiple_json_objects_finds_correct_one() {
450 let multi = r#"Summary: {"status": "ok", "count": 3}
451
452Details:
453{"regression_targets": [{"id": "rt-1"}, {"id": "rt-2"}]}
454
455Footer: {"ts": "2026-01-01"}"#;
456 let arr = extract_json_array(multi, "$.regression_targets")
457 .expect("should find the correct JSON object");
458 assert_eq!(arr.len(), 2);
459 assert_eq!(arr[0]["id"], json!("rt-1"));
460 }
461
462 #[test]
463 fn extract_array_unquoted_json_succeeds() {
464 let input = "I found: {targets: [a, b]}";
465 let arr = extract_json_array(input, "$.targets").expect("should repair and extract");
466 assert_eq!(arr.len(), 2);
467 assert_eq!(arr[0], json!("a"));
468 assert_eq!(arr[1], json!("b"));
469 }
470
471 #[test]
472 fn extract_array_truly_unparsable_fails() {
473 let bad = "I found: <<<not json at all>>>";
474 let result = extract_json_array(bad, "$.targets");
475 assert!(result.is_err());
476 }
477
478 #[test]
479 fn extract_array_no_matching_path_in_mixed_text_fails() {
480 let mixed = r#"Some text {"other_field": [1, 2]}"#;
481 let result = extract_json_array(mixed, "$.regression_targets");
482 assert!(result.is_err());
483 }
484
485 #[test]
486 fn extract_array_pure_json_still_works() {
487 let pure = r#"{"items": [{"id": "clean"}]}"#;
489 let arr = extract_json_array(pure, "$.items").expect("pure JSON must work");
490 assert_eq!(arr.len(), 1);
491 assert_eq!(arr[0]["id"], json!("clean"));
492 }
493
494 #[test]
495 fn extract_stream_json_result_prefers_last_result_line() {
496 let content = concat!(
497 "{\"type\":\"result\",\"result\":\"{\\\"score\\\":1}\"}\n",
498 "{\"type\":\"result\",\"result\":\"{\\\"score\\\":2}\"}\n"
499 );
500
501 assert_eq!(
502 extract_stream_json_result(content),
503 Some("{\"score\":2}".to_string())
504 );
505 }
506
507 #[test]
508 fn extract_stream_json_result_handles_redacted_lines() {
509 let content =
510 "{\"type\":\"result\",\"cost_usd\":[REDACTED],\"result\":\"{\\\"score\\\":42}\"}";
511
512 assert_eq!(
513 extract_stream_json_result(content),
514 Some("{\"score\":42}".to_string())
515 );
516 }
517
518 #[test]
521 fn repair_unquoted_json_keys_and_values() {
522 let input = r#"{id: docs/qa/foo.md, name: test}"#;
523 let repaired = repair_unquoted_json(input);
524 let parsed: Value = serde_json::from_str(&repaired).expect("should be valid JSON");
525 assert_eq!(parsed["id"], json!("docs/qa/foo.md"));
526 assert_eq!(parsed["name"], json!("test"));
527 }
528
529 #[test]
530 fn repair_unquoted_json_nested_array() {
531 let input = r#"{items: [{id: a}, {id: b}]}"#;
532 let repaired = repair_unquoted_json(input);
533 let parsed: Value = serde_json::from_str(&repaired).expect("should be valid JSON");
534 assert_eq!(parsed["items"].as_array().unwrap().len(), 2);
535 assert_eq!(parsed["items"][0]["id"], json!("a"));
536 assert_eq!(parsed["items"][1]["id"], json!("b"));
537 }
538
539 #[test]
540 fn repair_unquoted_json_preserves_valid() {
541 let input = r#"{"id":"a"}"#;
542 let repaired = repair_unquoted_json(input);
543 assert_eq!(repaired, input);
544 }
545
546 #[test]
547 fn repair_unquoted_json_mixed_quoted() {
548 let input = r#"{"id": "a", name: b}"#;
549 let repaired = repair_unquoted_json(input);
550 let parsed: Value = serde_json::from_str(&repaired).expect("should be valid JSON");
551 assert_eq!(parsed["id"], json!("a"));
552 assert_eq!(parsed["name"], json!("b"));
553 }
554
555 #[test]
556 fn repair_unquoted_json_numbers_bools_null() {
557 let input = r#"{count: 42, ok: true, x: null}"#;
558 let repaired = repair_unquoted_json(input);
559 let parsed: Value = serde_json::from_str(&repaired).expect("should be valid JSON");
560 assert_eq!(parsed["count"], json!(42));
561 assert_eq!(parsed["ok"], json!(true));
562 assert_eq!(parsed["x"], json!(null));
563 }
564
565 #[test]
566 fn repair_unquoted_json_file_paths() {
567 let input = r#"{id: docs/qa/orchestrator/02-cli-task-lifecycle.md}"#;
568 let repaired = repair_unquoted_json(input);
569 let parsed: Value = serde_json::from_str(&repaired).expect("should be valid JSON");
570 assert_eq!(
571 parsed["id"],
572 json!("docs/qa/orchestrator/02-cli-task-lifecycle.md")
573 );
574 }
575
576 #[test]
577 fn extract_array_unquoted_regression_targets() {
578 let input = r#"{regression_targets: [{id: docs/qa/foo.md, scope: unit}, {id: docs/qa/bar.md, scope: e2e}, {id: docs/qa/baz.md, scope: unit}, {id: docs/qa/qux.md, scope: integration}, {id: docs/qa/quux.md, scope: unit}]}"#;
579 let arr = extract_json_array(input, "$.regression_targets")
580 .expect("should extract unquoted regression targets");
581 assert_eq!(arr.len(), 5);
582 assert_eq!(arr[0]["id"], json!("docs/qa/foo.md"));
583 assert_eq!(arr[2]["scope"], json!("unit"));
584 }
585
586 #[test]
587 fn extract_array_mixed_text_unquoted() {
588 let input = r#"Based on my analysis, here are the targets:
589
590{regression_targets: [{id: target-a, name: A}, {id: target-b, name: B}]}
591
592That's all."#;
593 let arr = extract_json_array(input, "$.regression_targets")
594 .expect("should extract from mixed text with unquoted JSON");
595 assert_eq!(arr.len(), 2);
596 assert_eq!(arr[0]["id"], json!("target-a"));
597 assert_eq!(arr[1]["name"], json!("B"));
598 }
599}