use std::collections::{BTreeMap, BTreeSet};
use geonative_core::{FieldDef, Value, ValueType};
use serde_json::{Map as JsonMap, Value as Json};
pub fn infer_fields(all_props: &[Option<&JsonMap<String, Json>>]) -> Vec<FieldDef> {
let mut acc = FieldsAccumulator::new();
for props in all_props {
acc.observe(*props);
}
acc.finalize()
}
#[derive(Debug, Default)]
pub struct FieldsAccumulator {
order: Vec<String>,
seen: BTreeSet<String>,
observations: BTreeMap<String, KeyObs>,
}
impl FieldsAccumulator {
pub fn new() -> Self {
Self::default()
}
pub fn observe(&mut self, props: Option<&JsonMap<String, Json>>) {
let Some(map) = props else {
for k in &self.order {
self.observations.entry(k.clone()).or_default().nullable = true;
}
return;
};
let mut visited_this_row: BTreeSet<&str> = BTreeSet::new();
for (k, v) in map.iter() {
if self.seen.insert(k.clone()) {
self.order.push(k.clone());
}
visited_this_row.insert(k);
let obs = self.observations.entry(k.clone()).or_default();
obs.observe(v);
}
for k in &self.order {
if !visited_this_row.contains(k.as_str()) {
self.observations.entry(k.clone()).or_default().nullable = true;
}
}
}
pub fn finalize(mut self) -> Vec<FieldDef> {
self.order
.into_iter()
.map(|name| {
let obs = self.observations.remove(&name).unwrap_or_default();
FieldDef::new(name, obs.resolve_type(), obs.nullable)
})
.collect()
}
}
#[derive(Debug, Default)]
struct KeyObs {
saw_bool: bool,
saw_int: bool,
saw_float: bool,
saw_string: bool,
saw_composite: bool, saw_null: bool,
nullable: bool,
}
impl KeyObs {
fn observe(&mut self, v: &Json) {
match v {
Json::Null => {
self.saw_null = true;
self.nullable = true;
}
Json::Bool(_) => self.saw_bool = true,
Json::Number(n) => {
if n.is_i64() || n.is_u64() {
self.saw_int = true;
} else {
self.saw_float = true;
}
}
Json::String(_) => self.saw_string = true,
Json::Array(_) | Json::Object(_) => self.saw_composite = true,
}
}
fn resolve_type(&self) -> ValueType {
if self.saw_composite {
return ValueType::String;
}
let primitive_count = [self.saw_bool, self.saw_int, self.saw_float, self.saw_string]
.iter()
.filter(|b| **b)
.count();
if primitive_count >= 2 {
if self.saw_string || self.saw_bool {
return ValueType::String;
}
return ValueType::Float64;
}
if self.saw_bool {
ValueType::Bool
} else if self.saw_float {
ValueType::Float64
} else if self.saw_int {
ValueType::Int64
} else if self.saw_string {
ValueType::String
} else {
ValueType::String
}
}
}
pub fn json_to_value(j: Option<&Json>, ty: ValueType) -> Value {
let Some(v) = j else {
return Value::Null;
};
if v.is_null() {
return Value::Null;
}
match ty {
ValueType::Bool => v.as_bool().map(Value::Bool).unwrap_or(Value::Null),
ValueType::Int16 => v
.as_i64()
.and_then(|n| i16::try_from(n).ok())
.map(Value::Int16)
.unwrap_or(Value::Null),
ValueType::Int32 => v
.as_i64()
.and_then(|n| i32::try_from(n).ok())
.map(Value::Int32)
.unwrap_or(Value::Null),
ValueType::Int64 => v.as_i64().map(Value::Int64).unwrap_or(Value::Null),
ValueType::Float32 => v
.as_f64()
.map(|f| Value::Float32(f as f32))
.unwrap_or(Value::Null),
ValueType::Float64 => v.as_f64().map(Value::Float64).unwrap_or(Value::Null),
ValueType::String => Value::String(json_as_string(v)),
_ => Value::String(json_as_string(v)),
}
}
pub fn value_to_json(v: &Value) -> Json {
use serde_json::Number;
match v {
Value::Null => Json::Null,
Value::Bool(b) => Json::Bool(*b),
Value::Int16(n) => Json::Number((*n).into()),
Value::Int32(n) => Json::Number((*n).into()),
Value::Int64(n) => Json::Number((*n).into()),
Value::Float32(f) => Number::from_f64(*f as f64)
.map(Json::Number)
.unwrap_or(Json::Null),
Value::Float64(f) => Number::from_f64(*f).map(Json::Number).unwrap_or(Json::Null),
Value::String(s) => Json::String(s.clone()),
Value::Binary(b) => Json::String(hex_lower(b)),
Value::DateTime(d) => Number::from_f64(*d).map(Json::Number).unwrap_or(Json::Null),
Value::Guid(g) => Json::String(hex_lower(g)),
Value::Xml(s) => Json::String(s.clone()),
_ => Json::Null,
}
}
fn json_as_string(v: &Json) -> String {
match v {
Json::String(s) => s.clone(),
other => other.to_string(),
}
}
fn hex_lower(bytes: &[u8]) -> String {
let mut s = String::with_capacity(bytes.len() * 2);
for b in bytes {
const HEX: &[u8; 16] = b"0123456789abcdef";
s.push(HEX[(b >> 4) as usize] as char);
s.push(HEX[(b & 0x0f) as usize] as char);
}
s
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
fn props(j: Json) -> JsonMap<String, Json> {
j.as_object().unwrap().clone()
}
#[test]
fn infers_int_for_all_int_column() {
let a = props(json!({"id": 1}));
let b = props(json!({"id": 2}));
let fields = infer_fields(&[Some(&a), Some(&b)]);
assert_eq!(fields.len(), 1);
assert_eq!(fields[0].name, "id");
assert_eq!(fields[0].ty, ValueType::Int64);
assert!(!fields[0].nullable);
}
#[test]
fn widens_int_plus_float_to_float64() {
let a = props(json!({"score": 1}));
let b = props(json!({"score": 1.5}));
let fields = infer_fields(&[Some(&a), Some(&b)]);
assert_eq!(fields[0].ty, ValueType::Float64);
}
#[test]
fn missing_key_makes_field_nullable() {
let a = props(json!({"name": "alice"}));
let b = props(json!({}));
let fields = infer_fields(&[Some(&a), Some(&b)]);
assert_eq!(fields[0].name, "name");
assert!(fields[0].nullable);
}
#[test]
fn explicit_null_makes_nullable() {
let a = props(json!({"x": 1}));
let b = props(json!({"x": null}));
let fields = infer_fields(&[Some(&a), Some(&b)]);
assert_eq!(fields[0].ty, ValueType::Int64);
assert!(fields[0].nullable);
}
#[test]
fn composite_value_becomes_string() {
let a = props(json!({"tags": ["a", "b"]}));
let fields = infer_fields(&[Some(&a)]);
assert_eq!(fields[0].ty, ValueType::String);
}
#[test]
fn first_seen_key_order_preserved() {
let a = props(json!({"z": 1, "a": 2, "m": 3}));
let b = props(json!({"b": 4, "a": 5}));
let fields = infer_fields(&[Some(&a), Some(&b)]);
let names: Vec<&str> = fields.iter().map(|f| f.name.as_str()).collect();
assert_eq!(names.len(), 4);
assert!(names.contains(&"b"));
let pos_a = names.iter().position(|&n| n == "a").unwrap();
let pos_b = names.iter().position(|&n| n == "b").unwrap();
assert!(pos_a < pos_b, "first-seen 'a' before 'b': {names:?}");
}
#[test]
fn json_to_value_round_trips_primitives() {
assert_eq!(
json_to_value(Some(&json!(true)), ValueType::Bool),
Value::Bool(true)
);
assert_eq!(
json_to_value(Some(&json!(42)), ValueType::Int64),
Value::Int64(42)
);
assert_eq!(
json_to_value(Some(&json!(1.5)), ValueType::Float64),
Value::Float64(1.5)
);
assert_eq!(
json_to_value(Some(&json!("hi")), ValueType::String),
Value::String("hi".to_string())
);
assert_eq!(json_to_value(None, ValueType::Int64), Value::Null);
assert_eq!(
json_to_value(Some(&Json::Null), ValueType::Int64),
Value::Null
);
}
#[test]
fn json_to_value_int_overflow_is_null() {
let big = json!(i64::MAX);
assert_eq!(json_to_value(Some(&big), ValueType::Int32), Value::Null);
}
#[test]
fn value_to_json_round_trip() {
assert_eq!(value_to_json(&Value::Bool(true)), Json::Bool(true));
assert_eq!(value_to_json(&Value::Int64(7)), json!(7));
assert_eq!(value_to_json(&Value::Float64(1.5)), json!(1.5));
assert_eq!(
value_to_json(&Value::String("x".into())),
Json::String("x".into())
);
assert_eq!(value_to_json(&Value::Null), Json::Null);
}
}