1use std::str::FromStr;
18
19use datafusion_common::{stats::Precision, ColumnStatistics, DataFusionError, Result, ScalarValue};
20use sedona_common::sedona_internal_datafusion_err;
21use sedona_geometry::interval::{Interval, IntervalTrait};
22use sedona_geometry::{
23 bounding_box::BoundingBox,
24 types::{GeometryTypeAndDimensions, GeometryTypeAndDimensionsSet},
25};
26use serde::{Deserialize, Serialize};
27
28#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
37pub struct GeoStatistics {
38 bbox: Option<BoundingBox>, geometry_types: Option<GeometryTypeAndDimensionsSet>, total_geometries: Option<i64>, total_size_bytes: Option<i64>, total_points: Option<i64>, puntal_count: Option<i64>, lineal_count: Option<i64>, polygonal_count: Option<i64>, collection_count: Option<i64>, total_envelope_width: Option<f64>, total_envelope_height: Option<f64>, }
57
58impl GeoStatistics {
59 pub const UNSPECIFIED: GeoStatistics = Self {
61 bbox: None,
62 geometry_types: None,
63 total_geometries: None,
64 total_size_bytes: None,
65 total_points: None,
66 puntal_count: None,
67 lineal_count: None,
68 polygonal_count: None,
69 collection_count: None,
70 total_envelope_width: None,
71 total_envelope_height: None,
72 };
73
74 pub fn unspecified() -> Self {
76 Self::UNSPECIFIED.clone()
77 }
78
79 pub fn empty() -> Self {
81 Self {
82 bbox: Some(BoundingBox::xy(Interval::empty(), Interval::empty())),
83 geometry_types: Some(GeometryTypeAndDimensionsSet::new()), total_geometries: Some(0), total_size_bytes: Some(0), total_points: Some(0), puntal_count: Some(0), lineal_count: Some(0), polygonal_count: Some(0), collection_count: Some(0), total_envelope_width: Some(0.0), total_envelope_height: Some(0.0), }
94 }
95
96 pub fn with_bbox(self, bbox: Option<BoundingBox>) -> Self {
98 Self { bbox, ..self }
99 }
100
101 pub fn with_geometry_types(self, types: Option<GeometryTypeAndDimensionsSet>) -> Self {
103 Self {
104 geometry_types: types,
105 ..self
106 }
107 }
108
109 pub fn bbox(&self) -> Option<&BoundingBox> {
111 self.bbox.as_ref()
112 }
113
114 pub fn geometry_types(&self) -> Option<&GeometryTypeAndDimensionsSet> {
116 self.geometry_types.as_ref()
117 }
118
119 pub fn total_geometries(&self) -> Option<i64> {
121 self.total_geometries
122 }
123
124 pub fn total_size_bytes(&self) -> Option<i64> {
126 self.total_size_bytes
127 }
128
129 pub fn total_points(&self) -> Option<i64> {
131 self.total_points
132 }
133
134 pub fn puntal_count(&self) -> Option<i64> {
136 self.puntal_count
137 }
138
139 pub fn lineal_count(&self) -> Option<i64> {
141 self.lineal_count
142 }
143
144 pub fn polygonal_count(&self) -> Option<i64> {
146 self.polygonal_count
147 }
148
149 pub fn collection_count(&self) -> Option<i64> {
151 self.collection_count
152 }
153
154 pub fn total_envelope_width(&self) -> Option<f64> {
156 self.total_envelope_width
157 }
158
159 pub fn total_envelope_height(&self) -> Option<f64> {
161 self.total_envelope_height
162 }
163
164 pub fn mean_envelope_width(&self) -> Option<f64> {
166 match (self.total_envelope_width, self.total_geometries) {
167 (Some(width), Some(count)) if count > 0 => Some(width / count as f64),
168 _ => None,
169 }
170 }
171
172 pub fn mean_envelope_height(&self) -> Option<f64> {
174 match (self.total_envelope_height, self.total_geometries) {
175 (Some(height), Some(count)) if count > 0 => Some(height / count as f64),
176 _ => None,
177 }
178 }
179
180 pub fn mean_envelope_area(&self) -> Option<f64> {
182 match (self.mean_envelope_width(), self.mean_envelope_height()) {
183 (Some(width), Some(height)) => Some(width * height),
184 _ => None,
185 }
186 }
187
188 pub fn mean_size_bytes(&self) -> Option<f64> {
190 match (self.total_size_bytes, self.total_geometries) {
191 (Some(bytes), Some(count)) if count > 0 => Some(bytes as f64 / count as f64),
192 _ => None,
193 }
194 }
195
196 pub fn mean_points_per_geometry(&self) -> Option<f64> {
198 match (self.total_points, self.total_geometries) {
199 (Some(points), Some(count)) if count > 0 => Some(points as f64 / count as f64),
200 _ => None,
201 }
202 }
203
204 pub fn with_total_geometries(self, count: i64) -> Self {
206 Self {
207 total_geometries: Some(count),
208 ..self
209 }
210 }
211
212 pub fn with_total_size_bytes(self, bytes: i64) -> Self {
214 Self {
215 total_size_bytes: Some(bytes),
216 ..self
217 }
218 }
219
220 pub fn with_total_points(self, points: i64) -> Self {
222 Self {
223 total_points: Some(points),
224 ..self
225 }
226 }
227
228 pub fn with_puntal_count(self, count: i64) -> Self {
230 Self {
231 puntal_count: Some(count),
232 ..self
233 }
234 }
235
236 pub fn with_lineal_count(self, count: i64) -> Self {
238 Self {
239 lineal_count: Some(count),
240 ..self
241 }
242 }
243
244 pub fn with_polygonal_count(self, count: i64) -> Self {
246 Self {
247 polygonal_count: Some(count),
248 ..self
249 }
250 }
251
252 pub fn with_collection_count(self, count: i64) -> Self {
254 Self {
255 collection_count: Some(count),
256 ..self
257 }
258 }
259
260 pub fn with_total_envelope_width(self, width: f64) -> Self {
262 Self {
263 total_envelope_width: Some(width),
264 ..self
265 }
266 }
267
268 pub fn with_total_envelope_height(self, height: f64) -> Self {
270 Self {
271 total_envelope_height: Some(height),
272 ..self
273 }
274 }
275
276 pub fn merge(&mut self, other: &Self) {
278 if let Some(other_bbox) = &other.bbox {
280 match &mut self.bbox {
281 Some(bbox) => bbox.update_box(other_bbox),
282 None => self.bbox = Some(other_bbox.clone()),
283 }
284 }
285
286 if let Some(other_types) = &other.geometry_types {
288 match &mut self.geometry_types {
289 Some(types) => {
290 types.merge(other_types);
291 }
292 None => self.geometry_types = Some(other_types.clone()),
293 }
294 }
295
296 self.total_geometries =
298 Self::merge_option_add(self.total_geometries, other.total_geometries);
299 self.total_size_bytes =
300 Self::merge_option_add(self.total_size_bytes, other.total_size_bytes);
301 self.total_points = Self::merge_option_add(self.total_points, other.total_points);
302
303 self.puntal_count = Self::merge_option_add(self.puntal_count, other.puntal_count);
305 self.lineal_count = Self::merge_option_add(self.lineal_count, other.lineal_count);
306 self.polygonal_count = Self::merge_option_add(self.polygonal_count, other.polygonal_count);
307 self.collection_count =
308 Self::merge_option_add(self.collection_count, other.collection_count);
309
310 self.total_envelope_width =
312 Self::merge_option_add_f64(self.total_envelope_width, other.total_envelope_width);
313 self.total_envelope_height =
314 Self::merge_option_add_f64(self.total_envelope_height, other.total_envelope_height);
315 }
316
317 fn merge_option_add(a: Option<i64>, b: Option<i64>) -> Option<i64> {
319 match (a, b) {
320 (Some(a_val), Some(b_val)) => Some(a_val + b_val),
321 _ => None,
322 }
323 }
324
325 fn merge_option_add_f64(a: Option<f64>, b: Option<f64>) -> Option<f64> {
327 match (a, b) {
328 (Some(a_val), Some(b_val)) => Some(a_val + b_val),
329 _ => None,
330 }
331 }
332
333 pub fn try_from_column_statistics(stats: &ColumnStatistics) -> Result<Option<Self>> {
340 let scalar = match &stats.sum_value {
341 Precision::Exact(value) => value,
342 _ => {
343 return Ok(None);
344 }
345 };
346
347 if let ScalarValue::Binary(Some(serialized)) = scalar {
348 serde_json::from_slice(serialized).map_err(|e| DataFusionError::External(Box::new(e)))
349 } else {
350 Ok(None)
351 }
352 }
353
354 pub fn to_column_statistics(&self) -> Result<ColumnStatistics> {
359 let serialized =
360 serde_json::to_vec(self).map_err(|e| DataFusionError::External(Box::new(e)))?;
361 Ok(ColumnStatistics::new_unknown()
362 .with_sum_value(Precision::Exact(ScalarValue::Binary(Some(serialized)))))
363 }
364
365 pub fn try_with_str_geometry_types(self, geometry_types: Option<&[&str]>) -> Result<Self> {
370 match geometry_types {
371 Some(strings) => {
372 let mut new_geometry_types = GeometryTypeAndDimensionsSet::new();
373 for string in strings {
374 let type_and_dim = GeometryTypeAndDimensions::from_str(string)
375 .map_err(|e| DataFusionError::External(Box::new(e)))?;
376 new_geometry_types.insert_or_ignore(&type_and_dim);
377 }
378
379 Ok(Self {
380 geometry_types: Some(new_geometry_types),
381 ..self
382 })
383 }
384 None => Ok(Self {
385 geometry_types: None,
386 ..self
387 }),
388 }
389 }
390
391 pub fn to_scalar_value(&self) -> Result<ScalarValue> {
393 let serialized = serde_json::to_vec(self).map_err(|e| {
395 sedona_internal_datafusion_err!("Failed to serialize GeoStatistics: {e}")
396 })?;
397
398 Ok(ScalarValue::Binary(Some(serialized)))
399 }
400}
401
402#[cfg(test)]
403mod test {
404 use geo_traits::Dimensions;
405 use sedona_geometry::types::GeometryTypeId;
406
407 use super::*;
408
409 #[test]
410 fn unspecified() {
411 let stats = GeoStatistics::unspecified();
412 assert_eq!(stats.bbox(), None);
413 assert_eq!(stats.geometry_types(), None);
414 assert_eq!(stats.total_geometries(), None);
415 assert_eq!(stats.total_size_bytes(), None);
416 assert_eq!(stats.total_points(), None);
417 assert_eq!(stats.puntal_count(), None);
418 assert_eq!(stats.lineal_count(), None);
419 assert_eq!(stats.polygonal_count(), None);
420 assert_eq!(stats.collection_count(), None);
421 assert_eq!(stats.total_envelope_width(), None);
422 assert_eq!(stats.total_envelope_height(), None);
423
424 let regular_stats = stats.to_column_statistics().unwrap();
425 assert_eq!(
426 GeoStatistics::try_from_column_statistics(®ular_stats)
427 .unwrap()
428 .unwrap(),
429 stats
430 );
431 }
432
433 #[test]
434 fn specified_bbox() {
435 let bbox = BoundingBox::xy((0.0, 1.0), (2.0, 3.0));
436 let stats = GeoStatistics::empty().with_bbox(Some(bbox.clone()));
438 assert_eq!(stats.bbox(), Some(&bbox));
439 assert_eq!(
440 stats.geometry_types(),
441 Some(&GeometryTypeAndDimensionsSet::new())
442 );
443
444 let regular_stats = stats.to_column_statistics().unwrap();
445 assert_eq!(
446 GeoStatistics::try_from_column_statistics(®ular_stats)
447 .unwrap()
448 .unwrap(),
449 stats
450 );
451
452 let stats_with_none = GeoStatistics::empty().with_bbox(None);
454 assert_eq!(stats_with_none.bbox(), None);
455 }
456
457 #[test]
458 fn specified_geometry_types() {
459 let mut types = GeometryTypeAndDimensionsSet::new();
460 types
461 .insert(&GeometryTypeAndDimensions::new(
462 GeometryTypeId::Polygon,
463 Dimensions::Xy,
464 ))
465 .unwrap();
466
467 let stats = GeoStatistics::empty().with_geometry_types(Some(types.clone()));
469 assert_eq!(stats.geometry_types(), Some(&types));
470 assert_eq!(
471 stats.bbox(),
472 Some(&BoundingBox::xy(Interval::empty(), Interval::empty()))
473 );
474
475 let regular_stats = stats.to_column_statistics().unwrap();
476 assert_eq!(
477 GeoStatistics::try_from_column_statistics(®ular_stats)
478 .unwrap()
479 .unwrap(),
480 stats
481 );
482
483 let stats_with_none = GeoStatistics::empty().with_geometry_types(None);
485 assert_eq!(stats_with_none.geometry_types(), None);
486 }
487
488 #[test]
489 fn specified_geometry_types_by_name() {
490 let stats = GeoStatistics::empty()
492 .try_with_str_geometry_types(Some(&["polygon", "point"]))
493 .unwrap();
494
495 let mut expected_types = GeometryTypeAndDimensionsSet::new();
496 expected_types
497 .insert(&GeometryTypeAndDimensions::new(
498 GeometryTypeId::Polygon,
499 Dimensions::Xy,
500 ))
501 .unwrap();
502 expected_types
503 .insert(&GeometryTypeAndDimensions::new(
504 GeometryTypeId::Point,
505 Dimensions::Xy,
506 ))
507 .unwrap();
508
509 assert_eq!(stats.geometry_types(), Some(&expected_types));
510 assert_eq!(
511 stats.bbox(),
512 Some(&BoundingBox::xy(Interval::empty(), Interval::empty()))
513 );
514
515 let regular_stats = stats.to_column_statistics().unwrap();
517 assert_eq!(
518 GeoStatistics::try_from_column_statistics(®ular_stats)
519 .unwrap()
520 .unwrap(),
521 stats
522 );
523 }
524
525 #[test]
526 fn from_non_geometry_stats() {
527 let stats = ColumnStatistics::new_unknown();
529 assert!(GeoStatistics::try_from_column_statistics(&stats)
530 .unwrap()
531 .is_none());
532
533 let stats = ColumnStatistics::new_unknown()
535 .with_sum_value(Precision::Exact(ScalarValue::Binary(None)));
536 assert!(GeoStatistics::try_from_column_statistics(&stats)
537 .unwrap()
538 .is_none());
539
540 let stats = ColumnStatistics::new_unknown()
542 .with_sum_value(Precision::Exact(ScalarValue::Binary(Some(vec![]))));
543 let err = GeoStatistics::try_from_column_statistics(&stats).unwrap_err();
544 assert_eq!(
545 err.message(),
546 "EOF while parsing a value at line 1 column 0"
547 )
548 }
549
550 #[test]
551 fn test_extended_stats() {
552 let stats = GeoStatistics::empty()
554 .with_total_geometries(100)
555 .with_total_size_bytes(10000)
556 .with_total_points(5000)
557 .with_puntal_count(20)
558 .with_lineal_count(30)
559 .with_polygonal_count(40)
560 .with_collection_count(10)
561 .with_total_envelope_width(500.0)
562 .with_total_envelope_height(300.0);
563
564 assert_eq!(stats.total_geometries(), Some(100));
566 assert_eq!(stats.total_size_bytes(), Some(10000));
567 assert_eq!(stats.total_points(), Some(5000));
568 assert_eq!(stats.puntal_count(), Some(20));
569 assert_eq!(stats.lineal_count(), Some(30));
570 assert_eq!(stats.polygonal_count(), Some(40));
571 assert_eq!(stats.collection_count(), Some(10));
572 assert_eq!(stats.total_envelope_width(), Some(500.0));
573 assert_eq!(stats.total_envelope_height(), Some(300.0));
574
575 assert_eq!(stats.mean_size_bytes(), Some(100.0));
577 assert_eq!(stats.mean_points_per_geometry(), Some(50.0));
578 assert_eq!(stats.mean_envelope_width(), Some(5.0));
579 assert_eq!(stats.mean_envelope_height(), Some(3.0));
580 assert_eq!(stats.mean_envelope_area(), Some(15.0));
581
582 let column_stats = stats.to_column_statistics().unwrap();
584 let deserialized = GeoStatistics::try_from_column_statistics(&column_stats)
585 .unwrap()
586 .unwrap();
587 assert_eq!(deserialized, stats);
588 }
589
590 #[test]
591 fn test_merge_extended_stats() {
592 let stats1 = GeoStatistics::empty()
594 .with_total_geometries(50)
595 .with_total_size_bytes(5000)
596 .with_total_points(2500)
597 .with_puntal_count(10)
598 .with_lineal_count(15)
599 .with_polygonal_count(20)
600 .with_collection_count(5)
601 .with_total_envelope_width(250.0)
602 .with_total_envelope_height(150.0);
603
604 let stats2 = GeoStatistics::empty()
605 .with_total_geometries(50)
606 .with_total_size_bytes(5000)
607 .with_total_points(2500)
608 .with_puntal_count(10)
609 .with_lineal_count(15)
610 .with_polygonal_count(20)
611 .with_collection_count(5)
612 .with_total_envelope_width(250.0)
613 .with_total_envelope_height(150.0);
614
615 let mut merged = stats1.clone();
617 merged.merge(&stats2);
618
619 assert_eq!(merged.total_geometries(), Some(100));
621 assert_eq!(merged.total_size_bytes(), Some(10000));
622 assert_eq!(merged.total_points(), Some(5000));
623 assert_eq!(merged.puntal_count(), Some(20));
624 assert_eq!(merged.lineal_count(), Some(30));
625 assert_eq!(merged.polygonal_count(), Some(40));
626 assert_eq!(merged.collection_count(), Some(10));
627 assert_eq!(merged.total_envelope_width(), Some(500.0));
628 assert_eq!(merged.total_envelope_height(), Some(300.0));
629
630 let column_stats = merged.to_column_statistics().unwrap();
632 let deserialized = GeoStatistics::try_from_column_statistics(&column_stats)
633 .unwrap()
634 .unwrap();
635 assert_eq!(deserialized, merged);
636 }
637
638 #[test]
639 fn test_partial_merge() {
640 let stats1 = GeoStatistics::empty()
641 .with_total_geometries(50)
642 .with_total_size_bytes(5000);
643
644 let stats2 = GeoStatistics::empty()
645 .with_puntal_count(20)
646 .with_lineal_count(30);
647
648 let mut merged = stats1.clone();
649 merged.merge(&stats2);
650
651 assert_eq!(merged.total_geometries(), Some(50));
653 assert_eq!(merged.total_size_bytes(), Some(5000));
654 assert_eq!(merged.puntal_count(), Some(20));
655 assert_eq!(merged.lineal_count(), Some(30));
656 assert_eq!(merged.polygonal_count(), Some(0));
657
658 let column_stats = merged.to_column_statistics().unwrap();
660 let deserialized = GeoStatistics::try_from_column_statistics(&column_stats)
661 .unwrap()
662 .unwrap();
663 assert_eq!(deserialized, merged);
664 }
665}