1use std::path::{Path, PathBuf};
34
35use geonative_core::{Crs, Feature, GeomField, Geometry, GeometryType, Schema, Value};
36use serde_json::{Map as JsonMap, Value as Json};
37
38use crate::error::{GeoJsonError, Result};
39use crate::geometry::from_json as geom_from_json;
40use crate::properties::{json_to_value, FieldsAccumulator};
41use crate::scanner;
42
43#[derive(Debug)]
44pub struct GeoJsonReader {
45 inner: ReaderImpl,
46}
47
48#[derive(Debug)]
52enum ReaderImpl {
53 Streaming {
56 path: PathBuf,
57 schema: Schema,
58 feature_count: usize,
59 },
60 Eager {
63 schema: Schema,
64 features: Vec<Feature>,
65 },
66}
67
68impl GeoJsonReader {
69 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
82 let path = path.as_ref().to_path_buf();
83 let (schema, feature_count) = streaming_infer_schema(&path)?;
84 Ok(Self {
85 inner: ReaderImpl::Streaming {
86 path,
87 schema,
88 feature_count,
89 },
90 })
91 }
92
93 pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
97 let root: Json = serde_json::from_slice(bytes)?;
98 Self::from_value(root)
99 }
100
101 pub fn from_value(root: Json) -> Result<Self> {
104 let (schema, features) = build_eager_from_root(root)?;
105 Ok(Self {
106 inner: ReaderImpl::Eager { schema, features },
107 })
108 }
109
110 pub fn schema(&self) -> &Schema {
111 match &self.inner {
112 ReaderImpl::Streaming { schema, .. } => schema,
113 ReaderImpl::Eager { schema, .. } => schema,
114 }
115 }
116
117 pub fn feature_count(&self) -> usize {
118 match &self.inner {
119 ReaderImpl::Streaming { feature_count, .. } => *feature_count,
120 ReaderImpl::Eager { features, .. } => features.len(),
121 }
122 }
123
124 pub fn features(&self) -> &[Feature] {
128 match &self.inner {
129 ReaderImpl::Eager { features, .. } => features,
130 ReaderImpl::Streaming { .. } => &[],
131 }
132 }
133
134 pub fn into_features(self) -> FeatureIter {
140 match self.inner {
141 ReaderImpl::Streaming { path, schema, .. } => FeatureIter {
142 inner: IterInner::open_streaming(path, schema),
143 },
144 ReaderImpl::Eager { features, .. } => FeatureIter {
145 inner: IterInner::Eager(features.into_iter()),
146 },
147 }
148 }
149
150 pub fn iter_results(&self) -> impl Iterator<Item = Result<Feature>> + '_ {
155 let owned: Vec<Feature> = match &self.inner {
156 ReaderImpl::Eager { features, .. } => features.clone(),
157 ReaderImpl::Streaming { .. } => Vec::new(),
158 };
159 owned.into_iter().map(Ok)
160 }
161}
162
163pub struct FeatureIter {
169 inner: IterInner,
170}
171
172impl std::fmt::Debug for FeatureIter {
173 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
174 match &self.inner {
175 IterInner::Streaming { .. } => f.write_str("FeatureIter::Streaming"),
176 IterInner::Eager(_) => f.write_str("FeatureIter::Eager"),
177 IterInner::Failed(_) => f.write_str("FeatureIter::Failed"),
178 IterInner::Done => f.write_str("FeatureIter::Done"),
179 }
180 }
181}
182
183enum IterInner {
184 Streaming {
188 reader: std::io::BufReader<std::fs::File>,
189 schema: Box<Schema>,
190 },
191 Eager(std::vec::IntoIter<Feature>),
194 Failed(Option<GeoJsonError>),
195 Done,
196}
197
198impl IterInner {
199 fn open_streaming(path: PathBuf, schema: Schema) -> Self {
200 let buf_reader = match scanner::buf_reader_for_file(&path) {
201 Err(e) => return IterInner::Failed(Some(e)),
202 Ok(b) => b,
203 };
204 match scanner::open_top_level(buf_reader) {
205 Err(e) => IterInner::Failed(Some(e)),
206 Ok(scanner::TopLevel::Collection { reader, .. }) => IterInner::Streaming {
207 reader,
208 schema: Box::new(schema),
209 },
210 Ok(scanner::TopLevel::BareFeature(_) | scanner::TopLevel::BareGeometry(_)) => {
214 IterInner::Done
215 }
216 }
217 }
218}
219
220impl Iterator for FeatureIter {
221 type Item = Result<Feature>;
222
223 fn next(&mut self) -> Option<Self::Item> {
224 match &mut self.inner {
227 IterInner::Done => None,
228 IterInner::Failed(slot) => {
229 let err = slot.take()?;
230 self.inner = IterInner::Done;
231 Some(Err(err))
232 }
233 IterInner::Eager(iter) => iter.next().map(Ok),
234 IterInner::Streaming { reader, schema } => match scanner::next_feature_value(reader) {
235 Err(e) => {
236 self.inner = IterInner::Done;
237 Some(Err(e))
238 }
239 Ok(None) => {
240 self.inner = IterInner::Done;
241 None
242 }
243 Ok(Some(v)) => Some(build_feature_from_value(&v, schema)),
244 },
245 }
246 }
247}
248
249fn streaming_infer_schema(path: &Path) -> Result<(Schema, usize)> {
254 let buf = scanner::buf_reader_for_file(path)?;
255 let top = scanner::open_top_level(buf)?;
256 match top {
257 scanner::TopLevel::Collection {
258 mut reader,
259 header_keys,
260 } => {
261 let crs = crs_from_header(&header_keys);
262 let mut fields = FieldsAccumulator::new();
263 let mut geom_kind: Option<GeometryType> = None;
264 let mut count: usize = 0;
265 loop {
266 let val = scanner::next_feature_value(&mut reader)?;
267 let Some(val) = val else { break };
268 let raw = parse_feature_or_geometry(&val)?;
269 fields.observe(raw.properties.as_ref());
270 if let Some(g) = &raw.geometry {
271 let k = geom_type_of(g);
272 match geom_kind {
273 None => geom_kind = Some(k),
274 Some(existing) if existing == k => {}
275 Some(_) => geom_kind = Some(GeometryType::GeometryCollection),
276 }
277 }
278 count += 1;
279 }
280 let fields = fields.finalize();
281 let geom_field = geom_kind.map(|k| GeomField::new("geometry", k));
282 Ok((Schema::new(fields, geom_field, crs), count))
283 }
284 scanner::TopLevel::BareFeature(v) | scanner::TopLevel::BareGeometry(v) => {
285 let (schema, features) = build_eager_from_root(v)?;
289 let count = features.len();
296 let _ = features; Ok((schema, count))
301 }
302 }
303}
304
305fn build_eager_from_root(root: Json) -> Result<(Schema, Vec<Feature>)> {
310 let obj = root
311 .as_object()
312 .ok_or_else(|| GeoJsonError::malformed("GeoJSON root must be a JSON object"))?;
313 let ty = obj
314 .get("type")
315 .and_then(Json::as_str)
316 .ok_or_else(|| GeoJsonError::malformed("GeoJSON root missing 'type'"))?;
317 let crs = crs_from_header(obj);
318
319 let raw_features: Vec<RawFeature> = match ty {
320 "FeatureCollection" => {
321 let arr = obj
322 .get("features")
323 .and_then(Json::as_array)
324 .ok_or_else(|| GeoJsonError::malformed("FeatureCollection missing 'features'"))?;
325 arr.iter()
326 .map(parse_feature_or_geometry)
327 .collect::<Result<Vec<_>>>()?
328 }
329 "Feature" => vec![parse_feature(obj)?],
330 "Point" | "LineString" | "Polygon" | "MultiPoint" | "MultiLineString" | "MultiPolygon"
331 | "GeometryCollection" => vec![RawFeature {
332 fid: None,
333 geometry: Some(geom_from_json(&root)?),
334 properties: None,
335 }],
336 other => {
337 return Err(GeoJsonError::unsupported(format!(
338 "top-level type '{other}'"
339 )))
340 }
341 };
342
343 let mut acc = FieldsAccumulator::new();
344 for f in &raw_features {
345 acc.observe(f.properties.as_ref());
346 }
347 let fields = acc.finalize();
348 let geom_kind = detect_geom_kind(&raw_features);
349 let geom_field = geom_kind.map(|k| GeomField::new("geometry", k));
350 let schema = Schema::new(fields.clone(), geom_field, crs);
351
352 let features = raw_features
353 .into_iter()
354 .enumerate()
355 .map(|(i, raw)| {
356 let attrs: Vec<Value> = fields
357 .iter()
358 .map(|f| match raw.properties.as_ref() {
359 Some(props) => json_to_value(props.get(&f.name), f.ty),
360 None => Value::Null,
361 })
362 .collect();
363 Feature::new(raw.fid.or(Some(i as i64)), raw.geometry, attrs)
364 })
365 .collect();
366
367 Ok((schema, features))
368}
369
370fn build_feature_from_value(v: &Json, schema: &Schema) -> Result<Feature> {
374 let raw = parse_feature_or_geometry(v)?;
375 let attrs: Vec<Value> = schema
376 .fields
377 .iter()
378 .map(|f| match raw.properties.as_ref() {
379 Some(props) => json_to_value(props.get(&f.name), f.ty),
380 None => Value::Null,
381 })
382 .collect();
383 Ok(Feature::new(raw.fid, raw.geometry, attrs))
384}
385
386#[derive(Debug)]
387struct RawFeature {
388 fid: Option<i64>,
389 geometry: Option<Geometry>,
390 properties: Option<JsonMap<String, Json>>,
391}
392
393fn parse_feature_or_geometry(v: &Json) -> Result<RawFeature> {
394 let obj = v
395 .as_object()
396 .ok_or_else(|| GeoJsonError::malformed("feature must be a JSON object"))?;
397 let ty = obj
398 .get("type")
399 .and_then(Json::as_str)
400 .ok_or_else(|| GeoJsonError::malformed("feature missing 'type'"))?;
401 match ty {
402 "Feature" => parse_feature(obj),
403 "Point" | "LineString" | "Polygon" | "MultiPoint" | "MultiLineString" | "MultiPolygon"
404 | "GeometryCollection" => Ok(RawFeature {
405 fid: None,
406 geometry: Some(geom_from_json(v)?),
407 properties: None,
408 }),
409 other => Err(GeoJsonError::unsupported(format!(
410 "feature-array element type '{other}'"
411 ))),
412 }
413}
414
415fn parse_feature(obj: &JsonMap<String, Json>) -> Result<RawFeature> {
416 let geometry = match obj.get("geometry") {
417 Some(Json::Null) | None => None,
418 Some(other) => Some(geom_from_json(other)?),
419 };
420 let properties = match obj.get("properties") {
421 Some(Json::Null) | None => None,
422 Some(Json::Object(map)) => Some(map.clone()),
423 Some(_) => {
424 return Err(GeoJsonError::malformed(
425 "feature 'properties' must be object or null",
426 ))
427 }
428 };
429 let fid = obj.get("id").and_then(json_id_to_i64);
430 Ok(RawFeature {
431 fid,
432 geometry,
433 properties,
434 })
435}
436
437fn json_id_to_i64(j: &Json) -> Option<i64> {
438 if let Some(n) = j.as_i64() {
439 return Some(n);
440 }
441 if let Some(s) = j.as_str() {
442 return s.parse::<i64>().ok();
443 }
444 None
445}
446
447fn detect_geom_kind(features: &[RawFeature]) -> Option<GeometryType> {
448 let mut found: Option<GeometryType> = None;
449 for f in features {
450 if let Some(g) = &f.geometry {
451 let k = geom_type_of(g);
452 match found {
453 None => found = Some(k),
454 Some(existing) if existing == k => {}
455 Some(_) => return Some(GeometryType::GeometryCollection),
456 }
457 }
458 }
459 found
460}
461
462fn geom_type_of(g: &Geometry) -> GeometryType {
463 match g {
464 Geometry::Point(_) => GeometryType::Point,
465 Geometry::LineString(_) => GeometryType::LineString,
466 Geometry::Polygon(_) => GeometryType::Polygon,
467 Geometry::MultiPoint(_) => GeometryType::MultiPoint,
468 Geometry::MultiLineString(_) => GeometryType::MultiLineString,
469 Geometry::MultiPolygon(_) => GeometryType::MultiPolygon,
470 Geometry::GeometryCollection(_) => GeometryType::GeometryCollection,
471 _ => GeometryType::GeometryCollection,
472 }
473}
474
475fn crs_from_header(obj: &JsonMap<String, Json>) -> Crs {
477 if let Some(crs) = obj.get("crs") {
478 if let Some(name) = crs
479 .get("properties")
480 .and_then(|p| p.get("name"))
481 .and_then(Json::as_str)
482 {
483 if let Some(code) = parse_epsg_urn(name) {
484 return Crs::Epsg(code);
485 }
486 if name.contains("CRS84") {
487 return Crs::Epsg(4326);
488 }
489 return Crs::Wkt(name.to_string());
490 }
491 }
492 Crs::Epsg(4326)
493}
494
495fn parse_epsg_urn(s: &str) -> Option<u32> {
496 let lower = s.to_ascii_lowercase();
497 let idx = lower.rfind("epsg")?;
498 let tail = &s[idx + 4..];
499 let digits: String = tail.chars().filter(|c| c.is_ascii_digit()).collect();
500 digits.parse().ok()
501}
502
503#[cfg(test)]
504mod tests {
505 use super::*;
506 use geonative_core::ValueType;
507
508 #[test]
509 fn reads_feature_collection() {
510 let json = br#"
511 {
512 "type": "FeatureCollection",
513 "features": [
514 { "type": "Feature", "id": 1, "geometry": {"type":"Point","coordinates":[1,2]}, "properties": {"name": "a", "rank": 10}},
515 { "type": "Feature", "id": 2, "geometry": {"type":"Point","coordinates":[3,4]}, "properties": {"name": "b", "rank": 20}}
516 ]
517 }"#;
518 let r = GeoJsonReader::from_bytes(json).unwrap();
519 assert_eq!(r.feature_count(), 2);
520 assert_eq!(r.schema().fields.len(), 2);
521 assert_eq!(r.features()[0].fid, Some(1));
522 }
523
524 #[test]
525 fn reads_bare_feature() {
526 let json = br#"{"type":"Feature","geometry":{"type":"Point","coordinates":[1,2]},"properties":{}}"#;
527 let r = GeoJsonReader::from_bytes(json).unwrap();
528 assert_eq!(r.feature_count(), 1);
529 }
530
531 #[test]
532 fn reads_bare_geometry() {
533 let json = br#"{"type":"Point","coordinates":[10,20]}"#;
534 let r = GeoJsonReader::from_bytes(json).unwrap();
535 assert_eq!(r.feature_count(), 1);
536 assert!(r.features()[0].geometry.is_some());
537 }
538
539 #[test]
540 fn mixed_geometry_kinds_become_collection() {
541 let json = br#"{
542 "type":"FeatureCollection",
543 "features":[
544 {"type":"Feature","geometry":{"type":"Point","coordinates":[1,2]},"properties":{}},
545 {"type":"Feature","geometry":{"type":"LineString","coordinates":[[0,0],[1,1]]},"properties":{}}
546 ]
547 }"#;
548 let r = GeoJsonReader::from_bytes(json).unwrap();
549 assert_eq!(
550 r.schema().geometry.as_ref().unwrap().kind,
551 GeometryType::GeometryCollection
552 );
553 }
554
555 #[test]
556 fn honours_legacy_epsg_urn() {
557 let json = br#"{
558 "type":"FeatureCollection",
559 "crs":{"type":"name","properties":{"name":"urn:ogc:def:crs:EPSG::3857"}},
560 "features":[]
561 }"#;
562 let r = GeoJsonReader::from_bytes(json).unwrap();
563 assert_eq!(r.schema().crs, Crs::Epsg(3857));
564 }
565
566 #[test]
567 fn defaults_to_epsg_4326() {
568 let json = br#"{"type":"FeatureCollection","features":[]}"#;
569 let r = GeoJsonReader::from_bytes(json).unwrap();
570 assert_eq!(r.schema().crs, Crs::Epsg(4326));
571 }
572
573 #[test]
574 fn null_geometry_allowed() {
575 let json = br#"{
576 "type":"FeatureCollection",
577 "features":[
578 {"type":"Feature","geometry":null,"properties":{"x":1}}
579 ]
580 }"#;
581 let r = GeoJsonReader::from_bytes(json).unwrap();
582 assert_eq!(r.feature_count(), 1);
583 assert!(r.features()[0].geometry.is_none());
584 }
585
586 #[test]
587 fn rejects_non_object_root() {
588 assert!(GeoJsonReader::from_bytes(b"[]").is_err());
589 assert!(GeoJsonReader::from_bytes(b"42").is_err());
590 }
591
592 #[test]
593 fn rejects_missing_type() {
594 assert!(GeoJsonReader::from_bytes(b"{}").is_err());
595 }
596
597 #[test]
598 fn string_id_parses_to_fid() {
599 let json = br#"{"type":"Feature","id":"42","geometry":null,"properties":{}}"#;
600 let r = GeoJsonReader::from_bytes(json).unwrap();
601 assert_eq!(r.features()[0].fid, Some(42));
602 }
603
604 #[test]
605 fn schema_widens_int_to_float64() {
606 let json = br#"{
607 "type":"FeatureCollection",
608 "features":[
609 {"type":"Feature","geometry":null,"properties":{"v":1}},
610 {"type":"Feature","geometry":null,"properties":{"v":2.5}}
611 ]
612 }"#;
613 let r = GeoJsonReader::from_bytes(json).unwrap();
614 assert_eq!(r.schema().fields[0].ty, ValueType::Float64);
615 match &r.features()[0].attributes[0] {
616 Value::Float64(n) => assert_eq!(*n, 1.0),
617 other => panic!("expected Float64, got {other:?}"),
618 }
619 }
620
621 #[test]
622 fn streaming_open_on_file_matches_eager_results() {
623 let json = br#"{"type":"FeatureCollection","features":[
627 {"type":"Feature","id":7,"geometry":{"type":"Point","coordinates":[1,2]},"properties":{"name":"a","rank":10}},
628 {"type":"Feature","id":8,"geometry":{"type":"Point","coordinates":[3,4]},"properties":{"name":"b","rank":20}}
629 ]}"#;
630 let tmp = tempfile::NamedTempFile::new().unwrap();
631 std::fs::write(tmp.path(), json).unwrap();
632
633 let eager = GeoJsonReader::from_bytes(json).unwrap();
634 let streaming = GeoJsonReader::open(tmp.path()).unwrap();
635
636 assert_eq!(streaming.feature_count(), eager.feature_count());
637 assert_eq!(streaming.schema().fields.len(), eager.schema().fields.len());
638 assert_eq!(streaming.schema().crs, eager.schema().crs);
639
640 let streamed: Vec<Feature> = streaming
641 .into_features()
642 .collect::<Result<Vec<_>>>()
643 .unwrap();
644 assert_eq!(streamed.len(), eager.features().len());
645 assert_eq!(streamed[0].attributes, eager.features()[0].attributes);
647 }
648}