use serde_json::Value as JsonValue;
use crate::data::DataValue;
use crate::error::{LaurusError, Result};
use crate::lexical::core::field::{
BooleanOption, FloatOption, GeoOption, IntegerOption, TextOption,
};
use super::schema::FieldOption;
#[derive(Debug, Clone)]
pub enum InferredValue {
Inferred {
value: DataValue,
option: FieldOption,
},
Skip,
}
pub fn infer_option_from_data_value(value: &DataValue) -> Result<Option<FieldOption>> {
match value {
DataValue::Null => Ok(None),
DataValue::Text(_) => Ok(Some(FieldOption::Text(TextOption::default()))),
DataValue::Int64(_) => Ok(Some(FieldOption::Integer(IntegerOption::default()))),
DataValue::Float64(_) => Ok(Some(FieldOption::Float(FloatOption::default()))),
DataValue::Bool(_) => Ok(Some(FieldOption::Boolean(BooleanOption::default()))),
DataValue::DateTime(_) => Ok(Some(FieldOption::DateTime(
crate::lexical::core::field::DateTimeOption::default(),
))),
DataValue::Geo(_) => Ok(Some(FieldOption::Geo(GeoOption::default()))),
DataValue::GeoEcef(_) => Ok(Some(FieldOption::Geo3d(
crate::lexical::core::field::Geo3dOption::default(),
))),
DataValue::Int64Array(_) => Ok(Some(FieldOption::Integer(IntegerOption {
multi_valued: true,
..Default::default()
}))),
DataValue::Float64Array(_) => Ok(Some(FieldOption::Float(FloatOption {
multi_valued: true,
..Default::default()
}))),
DataValue::Vector(_) => Err(LaurusError::invalid_argument(
"vector values require an explicit vector field declaration \
(Hnsw, Flat, or Ivf) in the schema",
)),
DataValue::Bytes(_, _) => Err(LaurusError::invalid_argument(
"bytes values require an explicit bytes field declaration in the schema",
)),
}
}
pub fn infer_from_json(value: &JsonValue) -> Result<InferredValue> {
match value {
JsonValue::Null => Ok(InferredValue::Skip),
JsonValue::Bool(b) => Ok(InferredValue::Inferred {
value: DataValue::Bool(*b),
option: FieldOption::Boolean(BooleanOption::default()),
}),
JsonValue::Number(n) => {
if let Some(i) = n.as_i64() {
Ok(InferredValue::Inferred {
value: DataValue::Int64(i),
option: FieldOption::Integer(IntegerOption::default()),
})
} else if let Some(f) = n.as_f64() {
Ok(InferredValue::Inferred {
value: DataValue::Float64(f),
option: FieldOption::Float(FloatOption::default()),
})
} else {
Err(LaurusError::invalid_argument(format!(
"number {n} cannot be represented as i64 or f64"
)))
}
}
JsonValue::String(s) => Ok(InferredValue::Inferred {
value: DataValue::Text(s.clone()),
option: FieldOption::Text(TextOption::default()),
}),
JsonValue::Array(arr) => infer_from_array(arr),
JsonValue::Object(map) => infer_from_object(map),
}
}
fn infer_from_array(arr: &[JsonValue]) -> Result<InferredValue> {
if arr.is_empty() {
return Ok(InferredValue::Skip);
}
let mut all_i64 = true;
let mut all_numeric = true;
for elem in arr {
match elem {
JsonValue::Number(n) => {
if n.as_i64().is_none() {
all_i64 = false;
}
}
_ => {
all_numeric = false;
break;
}
}
}
if !all_numeric {
return Err(LaurusError::invalid_argument(
"array fields must contain only numeric values \
(mixed or non-numeric arrays are not supported)",
));
}
if all_i64 {
let values: Vec<i64> = arr
.iter()
.map(|v| v.as_i64().expect("checked above"))
.collect();
Ok(InferredValue::Inferred {
value: DataValue::Int64Array(values),
option: FieldOption::Integer(IntegerOption {
multi_valued: true,
..Default::default()
}),
})
} else {
let values: Vec<f64> = arr
.iter()
.map(|v| {
v.as_f64()
.expect("numeric JSON values are always representable as f64")
})
.collect();
Ok(InferredValue::Inferred {
value: DataValue::Float64Array(values),
option: FieldOption::Float(FloatOption {
multi_valued: true,
..Default::default()
}),
})
}
}
fn infer_from_object(map: &serde_json::Map<String, JsonValue>) -> Result<InferredValue> {
const LAT_KEYS: &[&str] = &["lat", "latitude"];
const LON_KEYS: &[&str] = &["lon", "lng", "longitude"];
let lat_val = LAT_KEYS.iter().find_map(|k| map.get(*k));
let lon_val = LON_KEYS.iter().find_map(|k| map.get(*k));
let x_val = map.get("x");
let y_val = map.get("y");
let z_val = map.get("z");
let has_2d_keys = lat_val.is_some() || lon_val.is_some();
let has_3d_keys = x_val.is_some() || y_val.is_some() || z_val.is_some();
if has_2d_keys && has_3d_keys {
return Err(LaurusError::invalid_argument(
"object cannot mix 2D geographic keys (lat/lon) with 3D ECEF keys (x/y/z); \
use either {lat, lon} or {x, y, z}",
));
}
if let (Some(lat_val), Some(lon_val)) = (lat_val, lon_val) {
let lat = lat_val
.as_f64()
.ok_or_else(|| LaurusError::invalid_argument("geo latitude must be a number"))?;
let lon = lon_val
.as_f64()
.ok_or_else(|| LaurusError::invalid_argument("geo longitude must be a number"))?;
if !(-90.0..=90.0).contains(&lat) {
return Err(LaurusError::invalid_argument(format!(
"geo latitude {lat} is out of range [-90, 90]"
)));
}
if !(-180.0..=180.0).contains(&lon) {
return Err(LaurusError::invalid_argument(format!(
"geo longitude {lon} is out of range [-180, 180]"
)));
}
return Ok(InferredValue::Inferred {
value: DataValue::Geo(crate::data::GeoPoint::new(lat, lon)),
option: FieldOption::Geo(GeoOption::default()),
});
}
if let (Some(x_val), Some(y_val), Some(z_val)) = (x_val, y_val, z_val) {
let x = x_val
.as_f64()
.ok_or_else(|| LaurusError::invalid_argument("Geo3d x must be a number"))?;
let y = y_val
.as_f64()
.ok_or_else(|| LaurusError::invalid_argument("Geo3d y must be a number"))?;
let z = z_val
.as_f64()
.ok_or_else(|| LaurusError::invalid_argument("Geo3d z must be a number"))?;
if !x.is_finite() || !y.is_finite() || !z.is_finite() {
return Err(LaurusError::invalid_argument(
"Geo3d coordinates (x, y, z) must be finite numbers",
));
}
return Ok(InferredValue::Inferred {
value: DataValue::GeoEcef(crate::data::GeoEcefPoint::new(x, y, z)),
option: FieldOption::Geo3d(crate::lexical::core::field::Geo3dOption::default()),
});
}
Err(LaurusError::invalid_argument(
"object values are only supported as geographic points \
(expected keys: lat|latitude, lon|lng|longitude for 2D, or x+y+z for 3D ECEF)",
))
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
fn inferred(v: InferredValue) -> (DataValue, FieldOption) {
match v {
InferredValue::Inferred { value, option } => (value, option),
InferredValue::Skip => panic!("expected Inferred, got Skip"),
}
}
#[test]
fn infer_string_to_text() {
let (v, o) = inferred(infer_from_json(&json!("hello")).unwrap());
assert_eq!(v, DataValue::Text("hello".into()));
assert!(matches!(o, FieldOption::Text(_)));
}
#[test]
fn infer_integer_to_integer() {
let (v, o) = inferred(infer_from_json(&json!(42)).unwrap());
assert_eq!(v, DataValue::Int64(42));
assert!(matches!(o, FieldOption::Integer(_)));
}
#[test]
fn infer_negative_integer() {
let (v, o) = inferred(infer_from_json(&json!(-7)).unwrap());
assert_eq!(v, DataValue::Int64(-7));
assert!(matches!(o, FieldOption::Integer(_)));
}
#[test]
fn infer_float_to_float() {
let (v, o) = inferred(infer_from_json(&json!(4.5)).unwrap());
assert_eq!(v, DataValue::Float64(4.5));
assert!(matches!(o, FieldOption::Float(_)));
}
#[test]
fn infer_bool_to_boolean() {
let (v, o) = inferred(infer_from_json(&json!(true)).unwrap());
assert_eq!(v, DataValue::Bool(true));
assert!(matches!(o, FieldOption::Boolean(_)));
}
#[test]
fn infer_null_skips() {
assert!(matches!(
infer_from_json(&JsonValue::Null).unwrap(),
InferredValue::Skip
));
}
#[test]
fn infer_empty_array_skips() {
assert!(matches!(
infer_from_json(&json!([])).unwrap(),
InferredValue::Skip
));
}
#[test]
fn infer_integer_array_to_int64_array() {
let (v, o) = inferred(infer_from_json(&json!([1, 2, 3])).unwrap());
assert_eq!(v, DataValue::Int64Array(vec![1, 2, 3]));
match o {
FieldOption::Integer(opt) => assert!(opt.multi_valued),
other => panic!("expected Integer with multi_valued=true, got {other:?}"),
}
}
#[test]
fn infer_float_array_to_float64_array() {
let (v, o) = inferred(infer_from_json(&json!([1.0, 2.5, 3])).unwrap());
assert_eq!(v, DataValue::Float64Array(vec![1.0, 2.5, 3.0]));
match o {
FieldOption::Float(opt) => assert!(opt.multi_valued),
other => panic!("expected Float with multi_valued=true, got {other:?}"),
}
}
#[test]
fn infer_mixed_array_rejected() {
let err = infer_from_json(&json!([1, "a"])).unwrap_err();
assert!(err.to_string().contains("only numeric"));
}
#[test]
fn infer_geo_lat_lon() {
let (v, o) = inferred(infer_from_json(&json!({"lat": 35.1, "lon": 139.0})).unwrap());
assert_eq!(v, DataValue::Geo(crate::data::GeoPoint::new(35.1, 139.0)));
assert!(matches!(o, FieldOption::Geo(_)));
}
#[test]
fn infer_geo_latitude_longitude() {
let (v, _) =
inferred(infer_from_json(&json!({"latitude": 35.1, "longitude": 139.0})).unwrap());
assert_eq!(v, DataValue::Geo(crate::data::GeoPoint::new(35.1, 139.0)));
}
#[test]
fn infer_geo_lng_alias() {
let (v, _) = inferred(infer_from_json(&json!({"lat": 35.1, "lng": 139.0})).unwrap());
assert_eq!(v, DataValue::Geo(crate::data::GeoPoint::new(35.1, 139.0)));
}
#[test]
fn infer_geo_out_of_range_lat() {
let err = infer_from_json(&json!({"lat": 100.0, "lon": 139.0})).unwrap_err();
assert!(err.to_string().contains("latitude"));
}
#[test]
fn infer_geo_out_of_range_lon() {
let err = infer_from_json(&json!({"lat": 35.1, "lon": 200.0})).unwrap_err();
assert!(err.to_string().contains("longitude"));
}
#[test]
fn infer_option_from_data_value_geo_ecef_to_geo3d() {
let p = crate::data::GeoEcefPoint::new(1.0, 2.0, 3.0);
let inferred = infer_option_from_data_value(&DataValue::GeoEcef(p))
.unwrap()
.expect("GeoEcef must infer some FieldOption");
assert!(
matches!(inferred, FieldOption::Geo3d(_)),
"expected FieldOption::Geo3d, got {inferred:?}"
);
}
#[test]
fn infer_option_from_data_value_geo_to_geo() {
let p = crate::data::GeoPoint::new(35.1, 139.0);
let inferred = infer_option_from_data_value(&DataValue::Geo(p))
.unwrap()
.expect("Geo must infer some FieldOption");
assert!(
matches!(inferred, FieldOption::Geo(_)),
"expected FieldOption::Geo, got {inferred:?}"
);
}
#[test]
fn infer_unknown_object_rejected() {
let err = infer_from_json(&json!({"foo": 1, "bar": 2})).unwrap_err();
assert!(err.to_string().contains("geographic"));
}
#[test]
fn infer_geo_missing_lon_rejected() {
let err = infer_from_json(&json!({"lat": 35.1})).unwrap_err();
assert!(err.to_string().contains("geographic"));
}
#[test]
fn infer_geo3d_xyz() {
let (v, o) = inferred(infer_from_json(&json!({"x": 1.0, "y": 2.0, "z": 3.0})).unwrap());
assert_eq!(
v,
DataValue::GeoEcef(crate::data::GeoEcefPoint::new(1.0, 2.0, 3.0))
);
assert!(matches!(o, FieldOption::Geo3d(_)));
}
#[test]
fn infer_geo3d_integer_xyz() {
let (v, _) = inferred(infer_from_json(&json!({"x": 1, "y": 2, "z": 3})).unwrap());
assert_eq!(
v,
DataValue::GeoEcef(crate::data::GeoEcefPoint::new(1.0, 2.0, 3.0))
);
}
#[test]
fn infer_geo3d_real_ecef_values() {
let (v, _) = inferred(
infer_from_json(&json!({
"x": -3_955_182.0,
"y": 3_350_553.0,
"z": 3_700_276.0
}))
.unwrap(),
);
assert_eq!(
v,
DataValue::GeoEcef(crate::data::GeoEcefPoint::new(
-3_955_182.0,
3_350_553.0,
3_700_276.0
))
);
}
#[test]
fn infer_geo3d_partial_keys_rejected() {
let err = infer_from_json(&json!({"x": 1.0, "y": 2.0})).unwrap_err();
assert!(err.to_string().contains("geographic"));
}
#[test]
fn infer_geo3d_non_numeric_rejected() {
let err = infer_from_json(&json!({"x": "not a number", "y": 2.0, "z": 3.0})).unwrap_err();
assert!(err.to_string().contains("Geo3d"));
}
#[test]
fn infer_geo3d_2d_3d_mix_rejected() {
let err = infer_from_json(&json!({"lat": 35.0, "x": 1.0, "y": 2.0, "z": 3.0})).unwrap_err();
assert!(err.to_string().contains("mix"));
}
#[test]
fn infer_geo3d_partial_2d_3d_mix_rejected() {
let err = infer_from_json(&json!({"lat": 35.0, "x": 1.0})).unwrap_err();
assert!(err.to_string().contains("mix"));
}
}