use std::collections::HashMap;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
use crate::error::{IoError, Result};
#[derive(Debug, Clone, PartialEq)]
pub enum JsonValue {
Null,
Bool(bool),
Number(f64),
String(String),
Array(Vec<JsonValue>),
Object(Vec<(std::string::String, JsonValue)>),
}
impl JsonValue {
pub fn as_str(&self) -> Option<&str> {
match self {
JsonValue::String(s) => Some(s.as_str()),
_ => None,
}
}
pub fn as_f64(&self) -> Option<f64> {
match self {
JsonValue::Number(n) => Some(*n),
_ => None,
}
}
pub fn as_bool(&self) -> Option<bool> {
match self {
JsonValue::Bool(b) => Some(*b),
_ => None,
}
}
pub fn is_null(&self) -> bool {
matches!(self, JsonValue::Null)
}
pub fn as_array(&self) -> Option<&[JsonValue]> {
match self {
JsonValue::Array(v) => Some(v.as_slice()),
_ => None,
}
}
pub fn as_object(&self) -> Option<&[(std::string::String, JsonValue)]> {
match self {
JsonValue::Object(pairs) => Some(pairs.as_slice()),
_ => None,
}
}
pub fn get(&self, key: &str) -> Option<&JsonValue> {
match self {
JsonValue::Object(pairs) => pairs
.iter()
.find(|(k, _)| k == key)
.map(|(_, v)| v),
_ => None,
}
}
}
struct Parser<'src> {
src: &'src [u8],
pos: usize,
}
impl<'src> Parser<'src> {
fn new(src: &'src str) -> Self {
Self {
src: src.as_bytes(),
pos: 0,
}
}
fn remaining(&self) -> usize {
self.src.len().saturating_sub(self.pos)
}
fn peek(&self) -> Option<u8> {
self.src.get(self.pos).copied()
}
fn consume(&mut self) -> Option<u8> {
let ch = self.src.get(self.pos).copied()?;
self.pos += 1;
Some(ch)
}
fn expect_byte(&mut self, expected: u8) -> Result<()> {
match self.peek() {
Some(b) if b == expected => {
self.pos += 1;
Ok(())
}
got => Err(IoError::ParseError(format!(
"expected '{}' at offset {} but got {:?}",
expected as char,
self.pos,
got.map(|b| b as char)
))),
}
}
fn skip_whitespace(&mut self) {
while let Some(b) = self.peek() {
if b == b' ' || b == b'\t' || b == b'\r' || b == b'\n' {
self.pos += 1;
} else {
break;
}
}
}
fn parse_value(&mut self) -> Result<JsonValue> {
self.skip_whitespace();
match self.peek().ok_or_else(|| IoError::ParseError("unexpected end of input".to_string()))? {
b'n' => self.parse_null(),
b't' | b'f' => self.parse_bool(),
b'"' => self.parse_string().map(JsonValue::String),
b'[' => self.parse_array(),
b'{' => self.parse_object(),
b'-' | b'0'..=b'9' => self.parse_number(),
other => Err(IoError::ParseError(format!(
"unexpected character '{}' at offset {}",
other as char, self.pos
))),
}
}
fn parse_null(&mut self) -> Result<JsonValue> {
self.expect_literal(b"null")?;
Ok(JsonValue::Null)
}
fn parse_bool(&mut self) -> Result<JsonValue> {
match self.peek() {
Some(b't') => {
self.expect_literal(b"true")?;
Ok(JsonValue::Bool(true))
}
Some(b'f') => {
self.expect_literal(b"false")?;
Ok(JsonValue::Bool(false))
}
_ => Err(IoError::ParseError(format!(
"expected 'true' or 'false' at offset {}",
self.pos
))),
}
}
fn expect_literal(&mut self, lit: &[u8]) -> Result<()> {
if self.remaining() < lit.len() {
return Err(IoError::ParseError(format!(
"expected '{}' at offset {} but input is too short",
std::str::from_utf8(lit).unwrap_or("<invalid>"),
self.pos
)));
}
if &self.src[self.pos..self.pos + lit.len()] != lit {
return Err(IoError::ParseError(format!(
"expected '{}' at offset {}",
std::str::from_utf8(lit).unwrap_or("<invalid>"),
self.pos
)));
}
self.pos += lit.len();
Ok(())
}
fn parse_number(&mut self) -> Result<JsonValue> {
let start = self.pos;
if self.peek() == Some(b'-') {
self.pos += 1;
}
match self.peek() {
Some(b'0') => {
self.pos += 1;
}
Some(b'1'..=b'9') => {
while matches!(self.peek(), Some(b'0'..=b'9')) {
self.pos += 1;
}
}
_ => {
return Err(IoError::ParseError(format!(
"invalid number at offset {start}"
)))
}
}
if self.peek() == Some(b'.') {
self.pos += 1;
if !matches!(self.peek(), Some(b'0'..=b'9')) {
return Err(IoError::ParseError(format!(
"expected digit after '.' at offset {}",
self.pos
)));
}
while matches!(self.peek(), Some(b'0'..=b'9')) {
self.pos += 1;
}
}
if matches!(self.peek(), Some(b'e') | Some(b'E')) {
self.pos += 1;
if matches!(self.peek(), Some(b'+') | Some(b'-')) {
self.pos += 1;
}
if !matches!(self.peek(), Some(b'0'..=b'9')) {
return Err(IoError::ParseError(format!(
"expected digit in exponent at offset {}",
self.pos
)));
}
while matches!(self.peek(), Some(b'0'..=b'9')) {
self.pos += 1;
}
}
let num_str = std::str::from_utf8(&self.src[start..self.pos])
.map_err(|e| IoError::ParseError(format!("number UTF-8 error: {e}")))?;
let n: f64 = num_str
.parse()
.map_err(|e| IoError::ParseError(format!("number parse error '{num_str}': {e}")))?;
Ok(JsonValue::Number(n))
}
fn parse_string(&mut self) -> Result<std::string::String> {
self.expect_byte(b'"')?;
let mut s = std::string::String::new();
loop {
let ch = self.consume().ok_or_else(|| {
IoError::ParseError("unterminated string".to_string())
})?;
match ch {
b'"' => break,
b'\\' => {
let esc = self.consume().ok_or_else(|| {
IoError::ParseError("unterminated escape sequence".to_string())
})?;
match esc {
b'"' => s.push('"'),
b'\\' => s.push('\\'),
b'/' => s.push('/'),
b'b' => s.push('\x08'),
b'f' => s.push('\x0C'),
b'n' => s.push('\n'),
b'r' => s.push('\r'),
b't' => s.push('\t'),
b'u' => {
let code_point = self.parse_unicode_escape()?;
s.push(code_point);
}
other => {
return Err(IoError::ParseError(format!(
"unknown escape '\\{}' at offset {}",
other as char,
self.pos
)))
}
}
}
b => {
let mut buf = vec![b];
let extra = if b & 0b1111_1000 == 0b1111_0000 {
3
} else if b & 0b1111_0000 == 0b1110_0000 {
2
} else if b & 0b1110_0000 == 0b1100_0000 {
1
} else {
0
};
for _ in 0..extra {
let cont = self.consume().ok_or_else(|| {
IoError::ParseError("truncated UTF-8 sequence in string".to_string())
})?;
buf.push(cont);
}
let decoded = std::str::from_utf8(&buf).map_err(|e| {
IoError::ParseError(format!("invalid UTF-8 in string: {e}"))
})?;
s.push_str(decoded);
}
}
}
Ok(s)
}
fn parse_unicode_escape(&mut self) -> Result<char> {
if self.remaining() < 4 {
return Err(IoError::ParseError(
"\\u escape requires 4 hex digits".to_string(),
));
}
let hex_bytes = &self.src[self.pos..self.pos + 4];
let hex_str = std::str::from_utf8(hex_bytes)
.map_err(|e| IoError::ParseError(format!("non-UTF8 in \\u escape: {e}")))?;
let code = u32::from_str_radix(hex_str, 16)
.map_err(|e| IoError::ParseError(format!("invalid \\u{hex_str}: {e}")))?;
self.pos += 4;
char::from_u32(code).ok_or_else(|| {
IoError::ParseError(format!("code point U+{code:04X} is not a valid char"))
})
}
fn parse_array(&mut self) -> Result<JsonValue> {
self.expect_byte(b'[')?;
let mut items = Vec::new();
self.skip_whitespace();
if self.peek() == Some(b']') {
self.pos += 1;
return Ok(JsonValue::Array(items));
}
loop {
let val = self.parse_value()?;
items.push(val);
self.skip_whitespace();
match self.peek() {
Some(b',') => {
self.pos += 1;
}
Some(b']') => {
self.pos += 1;
break;
}
other => {
return Err(IoError::ParseError(format!(
"expected ',' or ']' in array at offset {}, got {:?}",
self.pos,
other.map(|b| b as char)
)))
}
}
}
Ok(JsonValue::Array(items))
}
fn parse_object(&mut self) -> Result<JsonValue> {
self.expect_byte(b'{')?;
let mut pairs: Vec<(std::string::String, JsonValue)> = Vec::new();
self.skip_whitespace();
if self.peek() == Some(b'}') {
self.pos += 1;
return Ok(JsonValue::Object(pairs));
}
loop {
self.skip_whitespace();
let key = self.parse_string()?;
self.skip_whitespace();
self.expect_byte(b':')?;
let val = self.parse_value()?;
pairs.push((key, val));
self.skip_whitespace();
match self.peek() {
Some(b',') => {
self.pos += 1;
}
Some(b'}') => {
self.pos += 1;
break;
}
other => {
return Err(IoError::ParseError(format!(
"expected ',' or '}}' in object at offset {}, got {:?}",
self.pos,
other.map(|b| b as char)
)))
}
}
}
Ok(JsonValue::Object(pairs))
}
fn ensure_consumed(&mut self) -> Result<()> {
self.skip_whitespace();
if self.pos != self.src.len() {
Err(IoError::ParseError(format!(
"trailing garbage at offset {} (len {})",
self.pos,
self.src.len()
)))
} else {
Ok(())
}
}
}
pub fn parse_json(s: &str) -> Result<JsonValue> {
let mut p = Parser::new(s);
let val = p.parse_value()?;
p.ensure_consumed()?;
Ok(val)
}
pub fn extract_field<'a>(value: &'a JsonValue, path: &str) -> Option<&'a JsonValue> {
if path.is_empty() {
return Some(value);
}
let (segment, rest) = match path.find('.') {
Some(dot) => (&path[..dot], &path[dot + 1..]),
None => (path, ""),
};
let child = match value {
JsonValue::Object(pairs) => pairs
.iter()
.find(|(k, _)| k == segment)
.map(|(_, v)| v)?,
JsonValue::Array(items) => {
let idx: usize = segment.parse().ok()?;
items.get(idx)?
}
_ => return None,
};
if rest.is_empty() {
Some(child)
} else {
extract_field(child, rest)
}
}
pub fn flatten_json(value: &JsonValue, prefix: &str) -> HashMap<std::string::String, std::string::String> {
let mut map = HashMap::new();
flatten_json_into(value, prefix, &mut map);
map
}
fn flatten_json_into(
value: &JsonValue,
prefix: &str,
map: &mut HashMap<std::string::String, std::string::String>,
) {
match value {
JsonValue::Null => {
map.insert(prefix.to_string(), "null".to_string());
}
JsonValue::Bool(b) => {
map.insert(prefix.to_string(), b.to_string());
}
JsonValue::Number(n) => {
let s = format!("{n:.15}");
let trimmed = s
.trim_end_matches('0')
.trim_end_matches('.');
map.insert(prefix.to_string(), trimmed.to_string());
}
JsonValue::String(s) => {
map.insert(prefix.to_string(), s.clone());
}
JsonValue::Array(items) => {
for (idx, item) in items.iter().enumerate() {
let child_key = if prefix.is_empty() {
idx.to_string()
} else {
format!("{prefix}.{idx}")
};
flatten_json_into(item, &child_key, map);
}
}
JsonValue::Object(pairs) => {
for (key, child) in pairs {
let child_key = if prefix.is_empty() {
key.clone()
} else {
format!("{prefix}.{key}")
};
flatten_json_into(child, &child_key, map);
}
}
}
}
pub struct JsonLinesReader {
inner: BufReader<File>,
line_number: u64,
finished: bool,
}
impl JsonLinesReader {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
let path = path.as_ref();
let file = File::open(path)
.map_err(|e| IoError::FileNotFound(format!("{}: {e}", path.display())))?;
Ok(Self {
inner: BufReader::new(file),
line_number: 0,
finished: false,
})
}
pub fn next_record(&mut self) -> Result<Option<JsonValue>> {
if self.finished {
return Ok(None);
}
let mut line = std::string::String::new();
loop {
line.clear();
let n = self
.inner
.read_line(&mut line)
.map_err(|e| IoError::FileError(format!("line {}: {e}", self.line_number + 1)))?;
if n == 0 {
self.finished = true;
return Ok(None);
}
self.line_number += 1;
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
let val = parse_json(trimmed).map_err(|e| {
IoError::ParseError(format!("line {}: {e}", self.line_number))
})?;
return Ok(Some(val));
}
}
pub fn collect_all(&mut self) -> Result<Vec<JsonValue>> {
let mut out = Vec::new();
while let Some(v) = self.next_record()? {
out.push(v);
}
Ok(out)
}
pub fn line_number(&self) -> u64 {
self.line_number
}
}
impl Iterator for JsonLinesReader {
type Item = Result<JsonValue>;
fn next(&mut self) -> Option<Self::Item> {
match self.next_record() {
Ok(Some(v)) => Some(Ok(v)),
Ok(None) => None,
Err(e) => Some(Err(e)),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
fn write_temp(name: &str, content: &str) -> std::path::PathBuf {
let dir = std::env::temp_dir().join("scirs2_streaming_json_tests");
std::fs::create_dir_all(&dir).expect("mkdir");
let p = dir.join(name);
let mut f = File::create(&p).expect("create");
f.write_all(content.as_bytes()).expect("write");
p
}
#[test]
fn test_parse_null() {
assert_eq!(parse_json("null").expect("parse"), JsonValue::Null);
}
#[test]
fn test_parse_bool_true() {
assert_eq!(parse_json("true").expect("parse"), JsonValue::Bool(true));
}
#[test]
fn test_parse_bool_false() {
assert_eq!(parse_json("false").expect("parse"), JsonValue::Bool(false));
}
#[test]
fn test_parse_integer() {
let v = parse_json("42").expect("parse");
assert!(matches!(v, JsonValue::Number(n) if (n - 42.0).abs() < 1e-10));
}
#[test]
fn test_parse_negative_number() {
let v = parse_json("-7").expect("parse");
assert!(matches!(v, JsonValue::Number(n) if (n - (-7.0)).abs() < 1e-10));
}
#[test]
fn test_parse_float() {
let v = parse_json("3.14159").expect("parse");
assert!(matches!(v, JsonValue::Number(n) if (n - 3.14159).abs() < 1e-5));
}
#[test]
fn test_parse_exponent() {
let v = parse_json("1.5e3").expect("parse");
assert!(matches!(v, JsonValue::Number(n) if (n - 1500.0).abs() < 1e-10));
}
#[test]
fn test_parse_string_simple() {
let v = parse_json(r#""hello""#).expect("parse");
assert!(matches!(v, JsonValue::String(ref s) if s == "hello"));
}
#[test]
fn test_parse_string_escape_sequences() {
let v = parse_json(r#""line1\nline2\ttab""#).expect("parse");
assert!(matches!(v, JsonValue::String(ref s) if s == "line1\nline2\ttab"));
}
#[test]
fn test_parse_string_unicode_escape() {
let v = parse_json(r#""\u0041""#).expect("parse"); assert!(matches!(v, JsonValue::String(ref s) if s == "A"));
}
#[test]
fn test_parse_empty_string() {
let v = parse_json(r#""""#).expect("parse");
assert!(matches!(v, JsonValue::String(ref s) if s.is_empty()));
}
#[test]
fn test_parse_empty_array() {
let v = parse_json("[]").expect("parse");
assert!(matches!(v, JsonValue::Array(ref a) if a.is_empty()));
}
#[test]
fn test_parse_array_of_numbers() {
let v = parse_json("[1, 2, 3]").expect("parse");
if let JsonValue::Array(a) = v {
assert_eq!(a.len(), 3);
assert!(matches!(a[0], JsonValue::Number(n) if (n - 1.0).abs() < 1e-10));
} else {
panic!("expected array");
}
}
#[test]
fn test_parse_nested_array() {
let v = parse_json("[[1, 2], [3, 4]]").expect("parse");
if let JsonValue::Array(outer) = v {
assert_eq!(outer.len(), 2);
assert!(matches!(&outer[0], JsonValue::Array(inner) if inner.len() == 2));
} else {
panic!("expected array");
}
}
#[test]
fn test_parse_empty_object() {
let v = parse_json("{}").expect("parse");
assert!(matches!(v, JsonValue::Object(ref p) if p.is_empty()));
}
#[test]
fn test_parse_simple_object() {
let v = parse_json(r#"{"name":"Alice","age":30}"#).expect("parse");
let name = v.get("name").expect("name");
assert!(matches!(name, JsonValue::String(s) if s == "Alice"));
let age = v.get("age").expect("age");
assert!(matches!(age, JsonValue::Number(n) if (n - 30.0).abs() < 1e-10));
}
#[test]
fn test_parse_nested_object() {
let v = parse_json(r#"{"a":{"b":{"c":99}}}"#).expect("parse");
let leaf = v.get("a").and_then(|a| a.get("b")).and_then(|b| b.get("c"));
assert!(matches!(leaf, Some(JsonValue::Number(n)) if (n - 99.0).abs() < 1e-10));
}
#[test]
fn test_parse_object_with_array_value() {
let v = parse_json(r#"{"items":[1,2,3]}"#).expect("parse");
let items = v.get("items").expect("items");
assert!(matches!(items, JsonValue::Array(a) if a.len() == 3));
}
#[test]
fn test_parse_trailing_garbage_is_error() {
assert!(parse_json("42 abc").is_err());
}
#[test]
fn test_parse_unterminated_string_is_error() {
assert!(parse_json(r#""hello"#).is_err());
}
#[test]
fn test_parse_missing_comma_in_array_is_error() {
assert!(parse_json("[1 2]").is_err());
}
#[test]
fn test_parse_empty_input_is_error() {
assert!(parse_json("").is_err());
}
#[test]
fn test_extract_top_level_field() {
let v = parse_json(r#"{"x":10}"#).expect("parse");
let f = extract_field(&v, "x").expect("field");
assert!(matches!(f, JsonValue::Number(n) if (n - 10.0).abs() < 1e-10));
}
#[test]
fn test_extract_nested_field() {
let v = parse_json(r#"{"a":{"b":{"c":"deep"}}}"#).expect("parse");
let f = extract_field(&v, "a.b.c").expect("field");
assert!(matches!(f, JsonValue::String(s) if s == "deep"));
}
#[test]
fn test_extract_array_index() {
let v = parse_json(r#"{"arr":[10,20,30]}"#).expect("parse");
let f = extract_field(&v, "arr.2").expect("field");
assert!(matches!(f, JsonValue::Number(n) if (n - 30.0).abs() < 1e-10));
}
#[test]
fn test_extract_missing_field_returns_none() {
let v = parse_json(r#"{"x":1}"#).expect("parse");
assert!(extract_field(&v, "y").is_none());
}
#[test]
fn test_extract_empty_path_returns_self() {
let v = parse_json("42").expect("parse");
let f = extract_field(&v, "").expect("self");
assert!(matches!(f, JsonValue::Number(n) if (n - 42.0).abs() < 1e-10));
}
#[test]
fn test_flatten_flat_object() {
let v = parse_json(r#"{"a":"x","b":1}"#).expect("parse");
let flat = flatten_json(&v, "");
assert_eq!(flat["a"], "x");
assert_eq!(flat["b"], "1");
}
#[test]
fn test_flatten_nested_object() {
let v = parse_json(r#"{"user":{"name":"Bob","age":25}}"#).expect("parse");
let flat = flatten_json(&v, "");
assert_eq!(flat["user.name"], "Bob");
assert_eq!(flat["user.age"], "25");
}
#[test]
fn test_flatten_array() {
let v = parse_json(r#"{"tags":["a","b","c"]}"#).expect("parse");
let flat = flatten_json(&v, "");
assert_eq!(flat["tags.0"], "a");
assert_eq!(flat["tags.1"], "b");
assert_eq!(flat["tags.2"], "c");
}
#[test]
fn test_flatten_with_prefix() {
let v = parse_json(r#"{"x":1}"#).expect("parse");
let flat = flatten_json(&v, "root");
assert_eq!(flat["root.x"], "1");
}
#[test]
fn test_flatten_null_and_bool() {
let v = parse_json(r#"{"active":true,"data":null}"#).expect("parse");
let flat = flatten_json(&v, "");
assert_eq!(flat["active"], "true");
assert_eq!(flat["data"], "null");
}
#[test]
fn test_jsonlines_reader_basic() {
let content = "{\"id\":1}\n{\"id\":2}\n{\"id\":3}\n";
let path = write_temp("basic.ndjson", content);
let mut r = JsonLinesReader::open(&path).expect("open");
let all = r.collect_all().expect("collect");
assert_eq!(all.len(), 3);
assert!(matches!(all[2].get("id"), Some(JsonValue::Number(n)) if (n - 3.0).abs() < 1e-10));
}
#[test]
fn test_jsonlines_reader_skips_blank_and_comment() {
let content = "\n# comment\n{\"v\":1}\n\n{\"v\":2}\n";
let path = write_temp("comments.ndjson", content);
let mut r = JsonLinesReader::open(&path).expect("open");
let all = r.collect_all().expect("collect");
assert_eq!(all.len(), 2);
}
#[test]
fn test_jsonlines_reader_iterator_interface() {
let content = "{\"x\":10}\n{\"x\":20}\n";
let path = write_temp("iter.ndjson", content);
let r = JsonLinesReader::open(&path).expect("open");
let vals: Vec<_> = r.map(|res| res.expect("ok")).collect();
assert_eq!(vals.len(), 2);
assert!(matches!(vals[0].get("x"), Some(JsonValue::Number(n)) if (n - 10.0).abs() < 1e-10));
}
#[test]
fn test_jsonlines_reader_empty_file() {
let path = write_temp("empty.ndjson", "");
let mut r = JsonLinesReader::open(&path).expect("open");
assert!(r.next_record().expect("ok").is_none());
}
#[test]
fn test_jsonlines_reader_line_number_tracking() {
let content = "\n{\"a\":1}\n{\"a\":2}\n";
let path = write_temp("lineno.ndjson", content);
let mut r = JsonLinesReader::open(&path).expect("open");
r.next_record().expect("ok").expect("some");
assert!(r.line_number() >= 2);
}
#[test]
fn test_jsonlines_reader_parse_error_propagates() {
let content = "{\"ok\":1}\n{bad json}\n";
let path = write_temp("bad.ndjson", content);
let mut r = JsonLinesReader::open(&path).expect("open");
let first = r.next_record().expect("first ok").expect("some");
assert!(first.get("ok").is_some());
assert!(r.next_record().is_err());
}
#[test]
fn test_jsonvalue_accessors() {
assert_eq!(JsonValue::Null.as_str(), None);
assert_eq!(JsonValue::String("hi".to_string()).as_str(), Some("hi"));
assert!((JsonValue::Number(2.5).as_f64().expect("f64") - 2.5).abs() < 1e-10);
assert_eq!(JsonValue::Bool(true).as_bool(), Some(true));
assert!(JsonValue::Null.is_null());
let arr = JsonValue::Array(vec![JsonValue::Null]);
assert_eq!(arr.as_array().expect("arr").len(), 1);
}
}