use std::path::{Path, PathBuf};
use geonative_core::{Crs, Feature, GeomField, Geometry, GeometryType, Schema, Value};
use serde_json::{Map as JsonMap, Value as Json};
use crate::error::{GeoJsonError, Result};
use crate::geometry::from_json as geom_from_json;
use crate::properties::{json_to_value, FieldsAccumulator};
use crate::scanner;
#[derive(Debug)]
pub struct GeoJsonReader {
inner: ReaderImpl,
}
#[derive(Debug)]
enum ReaderImpl {
Streaming {
path: PathBuf,
schema: Schema,
feature_count: usize,
},
Eager {
schema: Schema,
features: Vec<Feature>,
},
}
impl GeoJsonReader {
pub fn open(path: impl AsRef<Path>) -> Result<Self> {
let path = path.as_ref().to_path_buf();
let (schema, feature_count) = streaming_infer_schema(&path)?;
Ok(Self {
inner: ReaderImpl::Streaming {
path,
schema,
feature_count,
},
})
}
pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
let root: Json = serde_json::from_slice(bytes)?;
Self::from_value(root)
}
pub fn from_value(root: Json) -> Result<Self> {
let (schema, features) = build_eager_from_root(root)?;
Ok(Self {
inner: ReaderImpl::Eager { schema, features },
})
}
pub fn schema(&self) -> &Schema {
match &self.inner {
ReaderImpl::Streaming { schema, .. } => schema,
ReaderImpl::Eager { schema, .. } => schema,
}
}
pub fn feature_count(&self) -> usize {
match &self.inner {
ReaderImpl::Streaming { feature_count, .. } => *feature_count,
ReaderImpl::Eager { features, .. } => features.len(),
}
}
pub fn features(&self) -> &[Feature] {
match &self.inner {
ReaderImpl::Eager { features, .. } => features,
ReaderImpl::Streaming { .. } => &[],
}
}
pub fn into_features(self) -> FeatureIter {
match self.inner {
ReaderImpl::Streaming { path, schema, .. } => FeatureIter {
inner: IterInner::open_streaming(path, schema),
},
ReaderImpl::Eager { features, .. } => FeatureIter {
inner: IterInner::Eager(features.into_iter()),
},
}
}
pub fn iter_results(&self) -> impl Iterator<Item = Result<Feature>> + '_ {
let owned: Vec<Feature> = match &self.inner {
ReaderImpl::Eager { features, .. } => features.clone(),
ReaderImpl::Streaming { .. } => Vec::new(),
};
owned.into_iter().map(Ok)
}
}
pub struct FeatureIter {
inner: IterInner,
}
impl std::fmt::Debug for FeatureIter {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match &self.inner {
IterInner::Streaming { .. } => f.write_str("FeatureIter::Streaming"),
IterInner::Eager(_) => f.write_str("FeatureIter::Eager"),
IterInner::Failed(_) => f.write_str("FeatureIter::Failed"),
IterInner::Done => f.write_str("FeatureIter::Done"),
}
}
}
enum IterInner {
Streaming {
reader: std::io::BufReader<std::fs::File>,
schema: Box<Schema>,
},
Eager(std::vec::IntoIter<Feature>),
Failed(Option<GeoJsonError>),
Done,
}
impl IterInner {
fn open_streaming(path: PathBuf, schema: Schema) -> Self {
let buf_reader = match scanner::buf_reader_for_file(&path) {
Err(e) => return IterInner::Failed(Some(e)),
Ok(b) => b,
};
match scanner::open_top_level(buf_reader) {
Err(e) => IterInner::Failed(Some(e)),
Ok(scanner::TopLevel::Collection { reader, .. }) => IterInner::Streaming {
reader,
schema: Box::new(schema),
},
Ok(scanner::TopLevel::BareFeature(_) | scanner::TopLevel::BareGeometry(_)) => {
IterInner::Done
}
}
}
}
impl Iterator for FeatureIter {
type Item = Result<Feature>;
fn next(&mut self) -> Option<Self::Item> {
match &mut self.inner {
IterInner::Done => None,
IterInner::Failed(slot) => {
let err = slot.take()?;
self.inner = IterInner::Done;
Some(Err(err))
}
IterInner::Eager(iter) => iter.next().map(Ok),
IterInner::Streaming { reader, schema } => match scanner::next_feature_value(reader) {
Err(e) => {
self.inner = IterInner::Done;
Some(Err(e))
}
Ok(None) => {
self.inner = IterInner::Done;
None
}
Ok(Some(v)) => Some(build_feature_from_value(&v, schema)),
},
}
}
}
fn streaming_infer_schema(path: &Path) -> Result<(Schema, usize)> {
let buf = scanner::buf_reader_for_file(path)?;
let top = scanner::open_top_level(buf)?;
match top {
scanner::TopLevel::Collection {
mut reader,
header_keys,
} => {
let crs = crs_from_header(&header_keys);
let mut fields = FieldsAccumulator::new();
let mut geom_kind: Option<GeometryType> = None;
let mut count: usize = 0;
loop {
let val = scanner::next_feature_value(&mut reader)?;
let Some(val) = val else { break };
let raw = parse_feature_or_geometry(&val)?;
fields.observe(raw.properties.as_ref());
if let Some(g) = &raw.geometry {
let k = geom_type_of(g);
match geom_kind {
None => geom_kind = Some(k),
Some(existing) if existing == k => {}
Some(_) => geom_kind = Some(GeometryType::GeometryCollection),
}
}
count += 1;
}
let fields = fields.finalize();
let geom_field = geom_kind.map(|k| GeomField::new("geometry", k));
Ok((Schema::new(fields, geom_field, crs), count))
}
scanner::TopLevel::BareFeature(v) | scanner::TopLevel::BareGeometry(v) => {
let (schema, features) = build_eager_from_root(v)?;
let count = features.len();
let _ = features; Ok((schema, count))
}
}
}
fn build_eager_from_root(root: Json) -> Result<(Schema, Vec<Feature>)> {
let obj = root
.as_object()
.ok_or_else(|| GeoJsonError::malformed("GeoJSON root must be a JSON object"))?;
let ty = obj
.get("type")
.and_then(Json::as_str)
.ok_or_else(|| GeoJsonError::malformed("GeoJSON root missing 'type'"))?;
let crs = crs_from_header(obj);
let raw_features: Vec<RawFeature> = match ty {
"FeatureCollection" => {
let arr = obj
.get("features")
.and_then(Json::as_array)
.ok_or_else(|| GeoJsonError::malformed("FeatureCollection missing 'features'"))?;
arr.iter()
.map(parse_feature_or_geometry)
.collect::<Result<Vec<_>>>()?
}
"Feature" => vec![parse_feature(obj)?],
"Point" | "LineString" | "Polygon" | "MultiPoint" | "MultiLineString" | "MultiPolygon"
| "GeometryCollection" => vec![RawFeature {
fid: None,
geometry: Some(geom_from_json(&root)?),
properties: None,
}],
other => {
return Err(GeoJsonError::unsupported(format!(
"top-level type '{other}'"
)))
}
};
let mut acc = FieldsAccumulator::new();
for f in &raw_features {
acc.observe(f.properties.as_ref());
}
let fields = acc.finalize();
let geom_kind = detect_geom_kind(&raw_features);
let geom_field = geom_kind.map(|k| GeomField::new("geometry", k));
let schema = Schema::new(fields.clone(), geom_field, crs);
let features = raw_features
.into_iter()
.enumerate()
.map(|(i, raw)| {
let attrs: Vec<Value> = fields
.iter()
.map(|f| match raw.properties.as_ref() {
Some(props) => json_to_value(props.get(&f.name), f.ty),
None => Value::Null,
})
.collect();
Feature::new(raw.fid.or(Some(i as i64)), raw.geometry, attrs)
})
.collect();
Ok((schema, features))
}
fn build_feature_from_value(v: &Json, schema: &Schema) -> Result<Feature> {
let raw = parse_feature_or_geometry(v)?;
let attrs: Vec<Value> = schema
.fields
.iter()
.map(|f| match raw.properties.as_ref() {
Some(props) => json_to_value(props.get(&f.name), f.ty),
None => Value::Null,
})
.collect();
Ok(Feature::new(raw.fid, raw.geometry, attrs))
}
#[derive(Debug)]
struct RawFeature {
fid: Option<i64>,
geometry: Option<Geometry>,
properties: Option<JsonMap<String, Json>>,
}
fn parse_feature_or_geometry(v: &Json) -> Result<RawFeature> {
let obj = v
.as_object()
.ok_or_else(|| GeoJsonError::malformed("feature must be a JSON object"))?;
let ty = obj
.get("type")
.and_then(Json::as_str)
.ok_or_else(|| GeoJsonError::malformed("feature missing 'type'"))?;
match ty {
"Feature" => parse_feature(obj),
"Point" | "LineString" | "Polygon" | "MultiPoint" | "MultiLineString" | "MultiPolygon"
| "GeometryCollection" => Ok(RawFeature {
fid: None,
geometry: Some(geom_from_json(v)?),
properties: None,
}),
other => Err(GeoJsonError::unsupported(format!(
"feature-array element type '{other}'"
))),
}
}
fn parse_feature(obj: &JsonMap<String, Json>) -> Result<RawFeature> {
let geometry = match obj.get("geometry") {
Some(Json::Null) | None => None,
Some(other) => Some(geom_from_json(other)?),
};
let properties = match obj.get("properties") {
Some(Json::Null) | None => None,
Some(Json::Object(map)) => Some(map.clone()),
Some(_) => {
return Err(GeoJsonError::malformed(
"feature 'properties' must be object or null",
))
}
};
let fid = obj.get("id").and_then(json_id_to_i64);
Ok(RawFeature {
fid,
geometry,
properties,
})
}
fn json_id_to_i64(j: &Json) -> Option<i64> {
if let Some(n) = j.as_i64() {
return Some(n);
}
if let Some(s) = j.as_str() {
return s.parse::<i64>().ok();
}
None
}
fn detect_geom_kind(features: &[RawFeature]) -> Option<GeometryType> {
let mut found: Option<GeometryType> = None;
for f in features {
if let Some(g) = &f.geometry {
let k = geom_type_of(g);
match found {
None => found = Some(k),
Some(existing) if existing == k => {}
Some(_) => return Some(GeometryType::GeometryCollection),
}
}
}
found
}
fn geom_type_of(g: &Geometry) -> GeometryType {
match g {
Geometry::Point(_) => GeometryType::Point,
Geometry::LineString(_) => GeometryType::LineString,
Geometry::Polygon(_) => GeometryType::Polygon,
Geometry::MultiPoint(_) => GeometryType::MultiPoint,
Geometry::MultiLineString(_) => GeometryType::MultiLineString,
Geometry::MultiPolygon(_) => GeometryType::MultiPolygon,
Geometry::GeometryCollection(_) => GeometryType::GeometryCollection,
_ => GeometryType::GeometryCollection,
}
}
fn crs_from_header(obj: &JsonMap<String, Json>) -> Crs {
if let Some(crs) = obj.get("crs") {
if let Some(name) = crs
.get("properties")
.and_then(|p| p.get("name"))
.and_then(Json::as_str)
{
if let Some(code) = parse_epsg_urn(name) {
return Crs::Epsg(code);
}
if name.contains("CRS84") {
return Crs::Epsg(4326);
}
return Crs::Wkt(name.to_string());
}
}
Crs::Epsg(4326)
}
fn parse_epsg_urn(s: &str) -> Option<u32> {
let lower = s.to_ascii_lowercase();
let idx = lower.rfind("epsg")?;
let tail = &s[idx + 4..];
let digits: String = tail.chars().filter(|c| c.is_ascii_digit()).collect();
digits.parse().ok()
}
#[cfg(test)]
mod tests {
use super::*;
use geonative_core::ValueType;
#[test]
fn reads_feature_collection() {
let json = br#"
{
"type": "FeatureCollection",
"features": [
{ "type": "Feature", "id": 1, "geometry": {"type":"Point","coordinates":[1,2]}, "properties": {"name": "a", "rank": 10}},
{ "type": "Feature", "id": 2, "geometry": {"type":"Point","coordinates":[3,4]}, "properties": {"name": "b", "rank": 20}}
]
}"#;
let r = GeoJsonReader::from_bytes(json).unwrap();
assert_eq!(r.feature_count(), 2);
assert_eq!(r.schema().fields.len(), 2);
assert_eq!(r.features()[0].fid, Some(1));
}
#[test]
fn reads_bare_feature() {
let json = br#"{"type":"Feature","geometry":{"type":"Point","coordinates":[1,2]},"properties":{}}"#;
let r = GeoJsonReader::from_bytes(json).unwrap();
assert_eq!(r.feature_count(), 1);
}
#[test]
fn reads_bare_geometry() {
let json = br#"{"type":"Point","coordinates":[10,20]}"#;
let r = GeoJsonReader::from_bytes(json).unwrap();
assert_eq!(r.feature_count(), 1);
assert!(r.features()[0].geometry.is_some());
}
#[test]
fn mixed_geometry_kinds_become_collection() {
let json = br#"{
"type":"FeatureCollection",
"features":[
{"type":"Feature","geometry":{"type":"Point","coordinates":[1,2]},"properties":{}},
{"type":"Feature","geometry":{"type":"LineString","coordinates":[[0,0],[1,1]]},"properties":{}}
]
}"#;
let r = GeoJsonReader::from_bytes(json).unwrap();
assert_eq!(
r.schema().geometry.as_ref().unwrap().kind,
GeometryType::GeometryCollection
);
}
#[test]
fn honours_legacy_epsg_urn() {
let json = br#"{
"type":"FeatureCollection",
"crs":{"type":"name","properties":{"name":"urn:ogc:def:crs:EPSG::3857"}},
"features":[]
}"#;
let r = GeoJsonReader::from_bytes(json).unwrap();
assert_eq!(r.schema().crs, Crs::Epsg(3857));
}
#[test]
fn defaults_to_epsg_4326() {
let json = br#"{"type":"FeatureCollection","features":[]}"#;
let r = GeoJsonReader::from_bytes(json).unwrap();
assert_eq!(r.schema().crs, Crs::Epsg(4326));
}
#[test]
fn null_geometry_allowed() {
let json = br#"{
"type":"FeatureCollection",
"features":[
{"type":"Feature","geometry":null,"properties":{"x":1}}
]
}"#;
let r = GeoJsonReader::from_bytes(json).unwrap();
assert_eq!(r.feature_count(), 1);
assert!(r.features()[0].geometry.is_none());
}
#[test]
fn rejects_non_object_root() {
assert!(GeoJsonReader::from_bytes(b"[]").is_err());
assert!(GeoJsonReader::from_bytes(b"42").is_err());
}
#[test]
fn rejects_missing_type() {
assert!(GeoJsonReader::from_bytes(b"{}").is_err());
}
#[test]
fn string_id_parses_to_fid() {
let json = br#"{"type":"Feature","id":"42","geometry":null,"properties":{}}"#;
let r = GeoJsonReader::from_bytes(json).unwrap();
assert_eq!(r.features()[0].fid, Some(42));
}
#[test]
fn schema_widens_int_to_float64() {
let json = br#"{
"type":"FeatureCollection",
"features":[
{"type":"Feature","geometry":null,"properties":{"v":1}},
{"type":"Feature","geometry":null,"properties":{"v":2.5}}
]
}"#;
let r = GeoJsonReader::from_bytes(json).unwrap();
assert_eq!(r.schema().fields[0].ty, ValueType::Float64);
match &r.features()[0].attributes[0] {
Value::Float64(n) => assert_eq!(*n, 1.0),
other => panic!("expected Float64, got {other:?}"),
}
}
#[test]
fn streaming_open_on_file_matches_eager_results() {
let json = br#"{"type":"FeatureCollection","features":[
{"type":"Feature","id":7,"geometry":{"type":"Point","coordinates":[1,2]},"properties":{"name":"a","rank":10}},
{"type":"Feature","id":8,"geometry":{"type":"Point","coordinates":[3,4]},"properties":{"name":"b","rank":20}}
]}"#;
let tmp = tempfile::NamedTempFile::new().unwrap();
std::fs::write(tmp.path(), json).unwrap();
let eager = GeoJsonReader::from_bytes(json).unwrap();
let streaming = GeoJsonReader::open(tmp.path()).unwrap();
assert_eq!(streaming.feature_count(), eager.feature_count());
assert_eq!(streaming.schema().fields.len(), eager.schema().fields.len());
assert_eq!(streaming.schema().crs, eager.schema().crs);
let streamed: Vec<Feature> = streaming
.into_features()
.collect::<Result<Vec<_>>>()
.unwrap();
assert_eq!(streamed.len(), eager.features().len());
assert_eq!(streamed[0].attributes, eager.features()[0].attributes);
}
}