#![allow(dead_code)]
use serde_json::Map;
#[derive(Debug, Clone, PartialEq)]
pub enum PrefixState {
Incomplete,
ValidPrefix {
known_leaves: Map<String, serde_json::Value>,
missing_required: Vec<String>,
},
Malformed,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Ctx {
TopObject,
InKey,
AfterKey,
InValue,
InStringValue,
InNestedValue { depth: u32 },
}
pub struct PartialJsonParser {
buf: String,
required: Vec<String>,
known_cache: Map<String, serde_json::Value>,
scan_watermark: usize,
}
impl PartialJsonParser {
#[must_use]
pub fn new() -> Self {
Self {
buf: String::new(),
required: Vec::new(),
known_cache: Map::new(),
scan_watermark: 0,
}
}
pub fn set_required(&mut self, required: Vec<String>) {
self.required = required;
}
pub fn push(&mut self, delta: &str) -> PrefixState {
self.buf.push_str(delta);
self.scan()
}
pub fn reset(&mut self) {
self.buf.clear();
self.known_cache.clear();
self.scan_watermark = 0;
}
fn scan(&mut self) -> PrefixState {
let bytes = self.buf.as_bytes();
let len = bytes.len();
let mut i = if self.scan_watermark == 0 {
let start = skip_ws(bytes, 0);
if start >= len || bytes[start] != b'{' {
return if self.buf.trim().is_empty() {
PrefixState::Incomplete
} else {
PrefixState::Malformed
};
}
start + 1 } else {
self.scan_watermark
};
let mut known = self.known_cache.clone();
loop {
i = skip_ws(bytes, i);
if i >= len {
break; }
if bytes[i] == b'}' {
self.scan_watermark = i + 1;
let missing = self.missing(&known);
self.known_cache.clone_from(&known);
return PrefixState::ValidPrefix {
known_leaves: known,
missing_required: missing,
};
}
if bytes[i] == b',' {
i += 1;
i = skip_ws(bytes, i);
if i >= len {
break;
}
}
if bytes[i] != b'"' {
return PrefixState::Malformed;
}
let Some((key, after_key)) = read_string(bytes, i) else {
break; };
i = after_key;
i = skip_ws(bytes, i);
if i >= len {
break;
}
if bytes[i] != b':' {
return PrefixState::Malformed;
}
i += 1; i = skip_ws(bytes, i);
if i >= len {
break; }
match read_value(bytes, i) {
ReadValue::Complete(value, end) => {
known.insert(key, value);
self.scan_watermark = end;
self.known_cache.clone_from(&known);
i = end;
}
ReadValue::Incomplete => break,
ReadValue::Malformed => return PrefixState::Malformed,
}
}
let missing = self.missing(&known);
PrefixState::ValidPrefix {
known_leaves: known,
missing_required: missing,
}
}
fn missing(&self, known: &Map<String, serde_json::Value>) -> Vec<String> {
self.required
.iter()
.filter(|k| !known.contains_key(k.as_str()))
.cloned()
.collect()
}
}
impl Default for PartialJsonParser {
fn default() -> Self {
Self::new()
}
}
fn skip_ws(bytes: &[u8], mut i: usize) -> usize {
while i < bytes.len() && matches!(bytes[i], b' ' | b'\t' | b'\r' | b'\n') {
i += 1;
}
i
}
fn read_string(bytes: &[u8], start: usize) -> Option<(String, usize)> {
debug_assert_eq!(bytes[start], b'"');
let mut i = start + 1;
let mut escape = false;
while i < bytes.len() {
let b = bytes[i];
if escape {
escape = false;
} else if b == b'\\' {
escape = true;
} else if b == b'"' {
let content = std::str::from_utf8(&bytes[start + 1..i]).ok()?;
let json_str = [b"\"", &bytes[start + 1..i], b"\""].concat();
let decoded: String =
serde_json::from_slice(&json_str).unwrap_or_else(|_| content.to_owned());
return Some((decoded, i + 1));
}
i += 1;
}
None }
enum ReadValue {
Complete(serde_json::Value, usize),
Incomplete,
Malformed,
}
fn read_value(bytes: &[u8], i: usize) -> ReadValue {
if i >= bytes.len() {
return ReadValue::Incomplete;
}
match bytes[i] {
b'"' => match read_string(bytes, i) {
Some((s, end)) => ReadValue::Complete(serde_json::Value::String(s), end),
None => ReadValue::Incomplete,
},
b'{' | b'[' => read_nested(bytes, i),
b't' => read_literal(bytes, i, b"true", serde_json::Value::Bool(true)),
b'f' => read_literal(bytes, i, b"false", serde_json::Value::Bool(false)),
b'n' => read_literal(bytes, i, b"null", serde_json::Value::Null),
b'-' | b'0'..=b'9' => read_number(bytes, i),
_ => ReadValue::Malformed,
}
}
fn read_literal(bytes: &[u8], i: usize, lit: &[u8], val: serde_json::Value) -> ReadValue {
if bytes.len() < i + lit.len() {
return ReadValue::Incomplete;
}
if &bytes[i..i + lit.len()] == lit {
ReadValue::Complete(val, i + lit.len())
} else {
ReadValue::Malformed
}
}
fn read_number(bytes: &[u8], mut i: usize) -> ReadValue {
let start = i;
if i < bytes.len() && bytes[i] == b'-' {
i += 1;
}
while i < bytes.len() && (bytes[i].is_ascii_digit() || bytes[i] == b'.') {
i += 1;
}
if i < bytes.len() && matches!(bytes[i], b'e' | b'E') {
i += 1;
if i < bytes.len() && matches!(bytes[i], b'+' | b'-') {
i += 1;
}
while i < bytes.len() && bytes[i].is_ascii_digit() {
i += 1;
}
}
if i == start {
return ReadValue::Malformed;
}
if i < bytes.len() && !matches!(bytes[i], b',' | b'}' | b']' | b' ' | b'\t' | b'\r' | b'\n') {
return ReadValue::Incomplete;
}
let s = std::str::from_utf8(&bytes[start..i]).unwrap_or("");
match serde_json::from_str::<serde_json::Value>(s) {
Ok(v) => ReadValue::Complete(v, i),
Err(_) => ReadValue::Malformed,
}
}
fn read_nested(bytes: &[u8], start: usize) -> ReadValue {
let open = bytes[start];
let close = if open == b'{' { b'}' } else { b']' };
let mut depth = 1u32;
let mut i = start + 1;
let mut in_string = false;
let mut escape = false;
while i < bytes.len() {
let b = bytes[i];
if escape {
escape = false;
} else if in_string {
if b == b'\\' {
escape = true;
} else if b == b'"' {
in_string = false;
}
} else if b == b'"' {
in_string = true;
} else if b == open {
depth += 1;
} else if b == close {
depth -= 1;
if depth == 0 {
let parsed = std::str::from_utf8(&bytes[start..=i])
.ok()
.and_then(|s| serde_json::from_str::<serde_json::Value>(s).ok());
return match parsed {
Some(v) => ReadValue::Complete(v, i + 1),
None => ReadValue::Malformed,
};
}
}
i += 1;
}
ReadValue::Incomplete
}
#[cfg(test)]
mod tests {
use super::*;
fn push_all(p: &mut PartialJsonParser, parts: &[&str]) -> PrefixState {
let mut state = PrefixState::Incomplete;
for part in parts {
state = p.push(part);
}
state
}
#[test]
fn fixture_simple_command_two_deltas() {
let mut p = PartialJsonParser::new();
p.set_required(vec!["command".into()]);
p.push(r#"{"command": "ls "#);
let state = p.push(r#"-la"}"#);
match state {
PrefixState::ValidPrefix {
known_leaves,
missing_required,
} => {
assert!(missing_required.is_empty());
let v = known_leaves["command"].as_str().unwrap();
assert!(v.contains("ls") && v.contains("la"), "got: {v}");
}
other => panic!("expected ValidPrefix, got {other:?}"),
}
}
#[test]
fn fixture_multi_field_incremental() {
let mut p = PartialJsonParser::new();
p.set_required(vec!["path".into(), "content".into()]);
let state = push_all(
&mut p,
&[
r#"{"path": "/tmp/f"#,
r#"oo.txt", "content": "hel"#,
r#"lo world"}"#,
],
);
match state {
PrefixState::ValidPrefix {
known_leaves,
missing_required,
} => {
assert!(missing_required.is_empty(), "missing: {missing_required:?}");
assert!(known_leaves.contains_key("path"));
assert!(known_leaves.contains_key("content"));
}
other => panic!("expected ValidPrefix, got {other:?}"),
}
}
#[test]
fn fixture_escape_in_string() {
let mut p = PartialJsonParser::new();
p.set_required(vec!["msg".into()]);
let state = p.push(r#"{"msg": "say \"hello\""}"#);
match state {
PrefixState::ValidPrefix {
known_leaves,
missing_required,
} => {
assert!(missing_required.is_empty());
let v = known_leaves["msg"].as_str().unwrap();
assert!(v.contains("hello"), "got: {v}");
}
other => panic!("expected ValidPrefix, got {other:?}"),
}
}
#[test]
fn fixture_incomplete_then_resolved() {
let mut p = PartialJsonParser::new();
p.set_required(vec!["x".into()]);
let mid = p.push(r#"{"x": 42"#);
match &mid {
PrefixState::ValidPrefix {
known_leaves,
missing_required,
} => {
assert!(missing_required.is_empty());
assert_eq!(known_leaves["x"], 42);
}
PrefixState::Incomplete => {} other @ PrefixState::Malformed => panic!("unexpected: {other:?}"),
}
let done = p.push("}");
assert!(matches!(done, PrefixState::ValidPrefix { .. }));
}
#[test]
fn fixture_malformed_input() {
let mut p = PartialJsonParser::new();
let state = p.push("not-json");
assert!(matches!(state, PrefixState::Malformed));
}
#[test]
fn fixture_top_level_array_value() {
let mut p = PartialJsonParser::new();
p.set_required(vec!["items".into()]);
let state = p.push(r#"{"items": [1, 2, 3]}"#);
match state {
PrefixState::ValidPrefix {
known_leaves,
missing_required,
} => {
assert!(missing_required.is_empty());
assert!(known_leaves["items"].is_array());
}
other => panic!("expected ValidPrefix, got {other:?}"),
}
}
#[test]
fn reset_clears_buffer() {
let mut p = PartialJsonParser::new();
p.push(r#"{"x": 1}"#);
p.reset();
let state = p.push(r#"{"y": 2}"#);
match state {
PrefixState::ValidPrefix { known_leaves, .. } => {
assert!(
!known_leaves.contains_key("x"),
"should be cleared after reset"
);
}
other => panic!("{other:?}"),
}
}
#[test]
fn fixture_unicode_filename() {
let mut p = PartialJsonParser::new();
p.set_required(vec!["path".into()]);
let state = p.push(r#"{"path": "/tmp/Привет.txt"}"#);
match state {
PrefixState::ValidPrefix {
known_leaves,
missing_required,
} => {
assert!(missing_required.is_empty());
let v = known_leaves["path"].as_str().unwrap();
assert!(v.contains("Привет"), "non-ASCII corrupted: {v}");
}
other => panic!("expected ValidPrefix, got {other:?}"),
}
}
#[test]
fn fixture_incremental_watermark() {
let mut p = PartialJsonParser::new();
p.set_required(vec!["a".into(), "b".into()]);
let s1 = p.push(r#"{"a": 1, "b": "#);
match &s1 {
PrefixState::ValidPrefix {
known_leaves,
missing_required,
} => {
assert!(known_leaves.contains_key("a"));
assert!(missing_required.contains(&"b".to_string()));
}
PrefixState::Incomplete => {} other @ PrefixState::Malformed => panic!("unexpected s1: {other:?}"),
}
let s2 = p.push("2}");
match s2 {
PrefixState::ValidPrefix {
known_leaves,
missing_required,
} => {
assert!(
missing_required.is_empty(),
"still missing: {missing_required:?}"
);
assert_eq!(known_leaves["a"], 1);
assert_eq!(known_leaves["b"], 2);
}
other => panic!("expected ValidPrefix, got {other:?}"),
}
}
}