zeph_core/agent/speculative/
partial_json.rs1#![allow(dead_code)]
30
31use serde_json::Map;
32
33#[derive(Debug, Clone, PartialEq)]
35pub enum PrefixState {
36 Incomplete,
38 ValidPrefix {
41 known_leaves: Map<String, serde_json::Value>,
43 missing_required: Vec<String>,
45 },
46 Malformed,
48}
49
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51enum Ctx {
52 TopObject,
53 InKey,
54 AfterKey,
55 InValue,
56 InStringValue,
57 InNestedValue { depth: u32 },
58}
59
60pub struct PartialJsonParser {
78 buf: String,
79 required: Vec<String>,
80 known_cache: Map<String, serde_json::Value>,
82 scan_watermark: usize,
86}
87
88impl PartialJsonParser {
89 #[must_use]
92 pub fn new() -> Self {
93 Self {
94 buf: String::new(),
95 required: Vec::new(),
96 known_cache: Map::new(),
97 scan_watermark: 0,
98 }
99 }
100
101 pub fn set_required(&mut self, required: Vec<String>) {
106 self.required = required;
107 }
108
109 pub fn push(&mut self, delta: &str) -> PrefixState {
114 self.buf.push_str(delta);
115 self.scan()
116 }
117
118 pub fn reset(&mut self) {
120 self.buf.clear();
121 self.known_cache.clear();
122 self.scan_watermark = 0;
123 }
124
125 fn scan(&mut self) -> PrefixState {
131 let bytes = self.buf.as_bytes();
132 let len = bytes.len();
133
134 let mut i = if self.scan_watermark == 0 {
136 let start = skip_ws(bytes, 0);
137 if start >= len || bytes[start] != b'{' {
138 return if self.buf.trim().is_empty() {
139 PrefixState::Incomplete
140 } else {
141 PrefixState::Malformed
142 };
143 }
144 start + 1 } else {
146 self.scan_watermark
147 };
148
149 let mut known = self.known_cache.clone();
151
152 loop {
153 i = skip_ws(bytes, i);
154 if i >= len {
155 break; }
157
158 if bytes[i] == b'}' {
160 self.scan_watermark = i + 1;
161 let missing = self.missing(&known);
162 self.known_cache.clone_from(&known);
163 return PrefixState::ValidPrefix {
164 known_leaves: known,
165 missing_required: missing,
166 };
167 }
168
169 if bytes[i] == b',' {
171 i += 1;
172 i = skip_ws(bytes, i);
173 if i >= len {
174 break;
175 }
176 }
177
178 if bytes[i] != b'"' {
180 return PrefixState::Malformed;
181 }
182 let Some((key, after_key)) = read_string(bytes, i) else {
183 break; };
185 i = after_key;
186 i = skip_ws(bytes, i);
187
188 if i >= len {
189 break;
190 }
191 if bytes[i] != b':' {
192 return PrefixState::Malformed;
193 }
194 i += 1; i = skip_ws(bytes, i);
196
197 if i >= len {
198 break; }
200
201 match read_value(bytes, i) {
203 ReadValue::Complete(value, end) => {
204 known.insert(key, value);
205 self.scan_watermark = end;
207 self.known_cache.clone_from(&known);
208 i = end;
209 }
210 ReadValue::Incomplete => break,
211 ReadValue::Malformed => return PrefixState::Malformed,
212 }
213 }
214
215 let missing = self.missing(&known);
217 PrefixState::ValidPrefix {
218 known_leaves: known,
219 missing_required: missing,
220 }
221 }
222
223 fn missing(&self, known: &Map<String, serde_json::Value>) -> Vec<String> {
224 self.required
225 .iter()
226 .filter(|k| !known.contains_key(k.as_str()))
227 .cloned()
228 .collect()
229 }
230}
231
232impl Default for PartialJsonParser {
233 fn default() -> Self {
234 Self::new()
235 }
236}
237
238fn skip_ws(bytes: &[u8], mut i: usize) -> usize {
241 while i < bytes.len() && matches!(bytes[i], b' ' | b'\t' | b'\r' | b'\n') {
242 i += 1;
243 }
244 i
245}
246
247fn read_string(bytes: &[u8], start: usize) -> Option<(String, usize)> {
253 debug_assert_eq!(bytes[start], b'"');
254 let mut i = start + 1;
255 let mut escape = false;
256 while i < bytes.len() {
257 let b = bytes[i];
258 if escape {
259 escape = false;
260 } else if b == b'\\' {
261 escape = true;
262 } else if b == b'"' {
263 let content = std::str::from_utf8(&bytes[start + 1..i]).ok()?;
265 let json_str = [b"\"", &bytes[start + 1..i], b"\""].concat();
268 let decoded: String =
269 serde_json::from_slice(&json_str).unwrap_or_else(|_| content.to_owned());
270 return Some((decoded, i + 1));
271 }
272 i += 1;
273 }
274 None }
276
277enum ReadValue {
278 Complete(serde_json::Value, usize),
279 Incomplete,
280 Malformed,
281}
282
283fn read_value(bytes: &[u8], i: usize) -> ReadValue {
285 if i >= bytes.len() {
286 return ReadValue::Incomplete;
287 }
288 match bytes[i] {
289 b'"' => match read_string(bytes, i) {
290 Some((s, end)) => ReadValue::Complete(serde_json::Value::String(s), end),
291 None => ReadValue::Incomplete,
292 },
293 b'{' | b'[' => read_nested(bytes, i),
294 b't' => read_literal(bytes, i, b"true", serde_json::Value::Bool(true)),
295 b'f' => read_literal(bytes, i, b"false", serde_json::Value::Bool(false)),
296 b'n' => read_literal(bytes, i, b"null", serde_json::Value::Null),
297 b'-' | b'0'..=b'9' => read_number(bytes, i),
298 _ => ReadValue::Malformed,
299 }
300}
301
302fn read_literal(bytes: &[u8], i: usize, lit: &[u8], val: serde_json::Value) -> ReadValue {
303 if bytes.len() < i + lit.len() {
304 return ReadValue::Incomplete;
305 }
306 if &bytes[i..i + lit.len()] == lit {
307 ReadValue::Complete(val, i + lit.len())
308 } else {
309 ReadValue::Malformed
310 }
311}
312
313fn read_number(bytes: &[u8], mut i: usize) -> ReadValue {
314 let start = i;
315 if i < bytes.len() && bytes[i] == b'-' {
316 i += 1;
317 }
318 while i < bytes.len() && (bytes[i].is_ascii_digit() || bytes[i] == b'.') {
319 i += 1;
320 }
321 if i < bytes.len() && matches!(bytes[i], b'e' | b'E') {
323 i += 1;
324 if i < bytes.len() && matches!(bytes[i], b'+' | b'-') {
325 i += 1;
326 }
327 while i < bytes.len() && bytes[i].is_ascii_digit() {
328 i += 1;
329 }
330 }
331 if i == start {
332 return ReadValue::Malformed;
333 }
334 if i < bytes.len() && !matches!(bytes[i], b',' | b'}' | b']' | b' ' | b'\t' | b'\r' | b'\n') {
336 return ReadValue::Incomplete;
337 }
338 let s = std::str::from_utf8(&bytes[start..i]).unwrap_or("");
339 match serde_json::from_str::<serde_json::Value>(s) {
340 Ok(v) => ReadValue::Complete(v, i),
341 Err(_) => ReadValue::Malformed,
342 }
343}
344
345fn read_nested(bytes: &[u8], start: usize) -> ReadValue {
347 let open = bytes[start];
348 let close = if open == b'{' { b'}' } else { b']' };
349 let mut depth = 1u32;
350 let mut i = start + 1;
351 let mut in_string = false;
352 let mut escape = false;
353
354 while i < bytes.len() {
355 let b = bytes[i];
356 if escape {
357 escape = false;
358 } else if in_string {
359 if b == b'\\' {
360 escape = true;
361 } else if b == b'"' {
362 in_string = false;
363 }
364 } else if b == b'"' {
365 in_string = true;
366 } else if b == open {
367 depth += 1;
368 } else if b == close {
369 depth -= 1;
370 if depth == 0 {
371 let parsed = std::str::from_utf8(&bytes[start..=i])
373 .ok()
374 .and_then(|s| serde_json::from_str::<serde_json::Value>(s).ok());
375 return match parsed {
376 Some(v) => ReadValue::Complete(v, i + 1),
377 None => ReadValue::Malformed,
378 };
379 }
380 }
381 i += 1;
382 }
383 ReadValue::Incomplete
384}
385
386#[cfg(test)]
391mod tests {
392 use super::*;
393
394 fn push_all(p: &mut PartialJsonParser, parts: &[&str]) -> PrefixState {
395 let mut state = PrefixState::Incomplete;
396 for part in parts {
397 state = p.push(part);
398 }
399 state
400 }
401
402 #[test]
404 fn fixture_simple_command_two_deltas() {
405 let mut p = PartialJsonParser::new();
406 p.set_required(vec!["command".into()]);
407 p.push(r#"{"command": "ls "#);
408 let state = p.push(r#"-la"}"#);
409 match state {
410 PrefixState::ValidPrefix {
411 known_leaves,
412 missing_required,
413 } => {
414 assert!(missing_required.is_empty());
415 let v = known_leaves["command"].as_str().unwrap();
416 assert!(v.contains("ls") && v.contains("la"), "got: {v}");
417 }
418 other => panic!("expected ValidPrefix, got {other:?}"),
419 }
420 }
421
422 #[test]
424 fn fixture_multi_field_incremental() {
425 let mut p = PartialJsonParser::new();
426 p.set_required(vec!["path".into(), "content".into()]);
427 let state = push_all(
428 &mut p,
429 &[
430 r#"{"path": "/tmp/f"#,
431 r#"oo.txt", "content": "hel"#,
432 r#"lo world"}"#,
433 ],
434 );
435 match state {
436 PrefixState::ValidPrefix {
437 known_leaves,
438 missing_required,
439 } => {
440 assert!(missing_required.is_empty(), "missing: {missing_required:?}");
441 assert!(known_leaves.contains_key("path"));
442 assert!(known_leaves.contains_key("content"));
443 }
444 other => panic!("expected ValidPrefix, got {other:?}"),
445 }
446 }
447
448 #[test]
450 fn fixture_escape_in_string() {
451 let mut p = PartialJsonParser::new();
452 p.set_required(vec!["msg".into()]);
453 let state = p.push(r#"{"msg": "say \"hello\""}"#);
454 match state {
455 PrefixState::ValidPrefix {
456 known_leaves,
457 missing_required,
458 } => {
459 assert!(missing_required.is_empty());
460 let v = known_leaves["msg"].as_str().unwrap();
461 assert!(v.contains("hello"), "got: {v}");
462 }
463 other => panic!("expected ValidPrefix, got {other:?}"),
464 }
465 }
466
467 #[test]
469 fn fixture_incomplete_then_resolved() {
470 let mut p = PartialJsonParser::new();
471 p.set_required(vec!["x".into()]);
472 let mid = p.push(r#"{"x": 42"#);
473 match &mid {
475 PrefixState::ValidPrefix {
476 known_leaves,
477 missing_required,
478 } => {
479 assert!(missing_required.is_empty());
480 assert_eq!(known_leaves["x"], 42);
481 }
482 PrefixState::Incomplete => {} other @ PrefixState::Malformed => panic!("unexpected: {other:?}"),
484 }
485 let done = p.push("}");
486 assert!(matches!(done, PrefixState::ValidPrefix { .. }));
487 }
488
489 #[test]
491 fn fixture_malformed_input() {
492 let mut p = PartialJsonParser::new();
493 let state = p.push("not-json");
494 assert!(matches!(state, PrefixState::Malformed));
495 }
496
497 #[test]
499 fn fixture_top_level_array_value() {
500 let mut p = PartialJsonParser::new();
501 p.set_required(vec!["items".into()]);
502 let state = p.push(r#"{"items": [1, 2, 3]}"#);
503 match state {
504 PrefixState::ValidPrefix {
505 known_leaves,
506 missing_required,
507 } => {
508 assert!(missing_required.is_empty());
509 assert!(known_leaves["items"].is_array());
510 }
511 other => panic!("expected ValidPrefix, got {other:?}"),
512 }
513 }
514
515 #[test]
516 fn reset_clears_buffer() {
517 let mut p = PartialJsonParser::new();
518 p.push(r#"{"x": 1}"#);
519 p.reset();
520 let state = p.push(r#"{"y": 2}"#);
521 match state {
522 PrefixState::ValidPrefix { known_leaves, .. } => {
523 assert!(
524 !known_leaves.contains_key("x"),
525 "should be cleared after reset"
526 );
527 }
528 other => panic!("{other:?}"),
529 }
530 }
531
532 #[test]
534 fn fixture_unicode_filename() {
535 let mut p = PartialJsonParser::new();
536 p.set_required(vec!["path".into()]);
537 let state = p.push(r#"{"path": "/tmp/Привет.txt"}"#);
538 match state {
539 PrefixState::ValidPrefix {
540 known_leaves,
541 missing_required,
542 } => {
543 assert!(missing_required.is_empty());
544 let v = known_leaves["path"].as_str().unwrap();
545 assert!(v.contains("Привет"), "non-ASCII corrupted: {v}");
546 }
547 other => panic!("expected ValidPrefix, got {other:?}"),
548 }
549 }
550
551 #[test]
553 fn fixture_incremental_watermark() {
554 let mut p = PartialJsonParser::new();
555 p.set_required(vec!["a".into(), "b".into()]);
556 let s1 = p.push(r#"{"a": 1, "b": "#);
558 match &s1 {
559 PrefixState::ValidPrefix {
560 known_leaves,
561 missing_required,
562 } => {
563 assert!(known_leaves.contains_key("a"));
564 assert!(missing_required.contains(&"b".to_string()));
565 }
566 PrefixState::Incomplete => {} other @ PrefixState::Malformed => panic!("unexpected s1: {other:?}"),
568 }
569 let s2 = p.push("2}");
571 match s2 {
572 PrefixState::ValidPrefix {
573 known_leaves,
574 missing_required,
575 } => {
576 assert!(
577 missing_required.is_empty(),
578 "still missing: {missing_required:?}"
579 );
580 assert_eq!(known_leaves["a"], 1);
581 assert_eq!(known_leaves["b"], 2);
582 }
583 other => panic!("expected ValidPrefix, got {other:?}"),
584 }
585 }
586}