1use std::str::FromStr;
18
19use datafusion_common::{stats::Precision, ColumnStatistics, DataFusionError, Result, ScalarValue};
20use sedona_geometry::interval::{Interval, IntervalTrait};
21use sedona_geometry::{
22 bounding_box::BoundingBox,
23 types::{GeometryTypeAndDimensions, GeometryTypeAndDimensionsSet},
24};
25use serde::{Deserialize, Serialize};
26
27#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
36pub struct GeoStatistics {
37 bbox: Option<BoundingBox>, geometry_types: Option<GeometryTypeAndDimensionsSet>, total_geometries: Option<i64>, total_size_bytes: Option<i64>, total_points: Option<i64>, puntal_count: Option<i64>, lineal_count: Option<i64>, polygonal_count: Option<i64>, collection_count: Option<i64>, total_envelope_width: Option<f64>, total_envelope_height: Option<f64>, }
56
57impl GeoStatistics {
58 pub const UNSPECIFIED: GeoStatistics = Self {
60 bbox: None,
61 geometry_types: None,
62 total_geometries: None,
63 total_size_bytes: None,
64 total_points: None,
65 puntal_count: None,
66 lineal_count: None,
67 polygonal_count: None,
68 collection_count: None,
69 total_envelope_width: None,
70 total_envelope_height: None,
71 };
72
73 pub fn unspecified() -> Self {
75 Self::UNSPECIFIED.clone()
76 }
77
78 pub fn empty() -> Self {
80 Self {
81 bbox: Some(BoundingBox::xy(Interval::empty(), Interval::empty())),
82 geometry_types: Some(GeometryTypeAndDimensionsSet::new()), total_geometries: Some(0), total_size_bytes: Some(0), total_points: Some(0), puntal_count: Some(0), lineal_count: Some(0), polygonal_count: Some(0), collection_count: Some(0), total_envelope_width: Some(0.0), total_envelope_height: Some(0.0), }
93 }
94
95 pub fn with_bbox(self, bbox: Option<BoundingBox>) -> Self {
97 Self { bbox, ..self }
98 }
99
100 pub fn with_geometry_types(self, types: Option<GeometryTypeAndDimensionsSet>) -> Self {
102 Self {
103 geometry_types: types,
104 ..self
105 }
106 }
107
108 pub fn bbox(&self) -> Option<&BoundingBox> {
110 self.bbox.as_ref()
111 }
112
113 pub fn geometry_types(&self) -> Option<&GeometryTypeAndDimensionsSet> {
115 self.geometry_types.as_ref()
116 }
117
118 pub fn total_geometries(&self) -> Option<i64> {
120 self.total_geometries
121 }
122
123 pub fn total_size_bytes(&self) -> Option<i64> {
125 self.total_size_bytes
126 }
127
128 pub fn total_points(&self) -> Option<i64> {
130 self.total_points
131 }
132
133 pub fn puntal_count(&self) -> Option<i64> {
135 self.puntal_count
136 }
137
138 pub fn lineal_count(&self) -> Option<i64> {
140 self.lineal_count
141 }
142
143 pub fn polygonal_count(&self) -> Option<i64> {
145 self.polygonal_count
146 }
147
148 pub fn collection_count(&self) -> Option<i64> {
150 self.collection_count
151 }
152
153 pub fn total_envelope_width(&self) -> Option<f64> {
155 self.total_envelope_width
156 }
157
158 pub fn total_envelope_height(&self) -> Option<f64> {
160 self.total_envelope_height
161 }
162
163 pub fn mean_envelope_width(&self) -> Option<f64> {
165 match (self.total_envelope_width, self.total_geometries) {
166 (Some(width), Some(count)) if count > 0 => Some(width / count as f64),
167 _ => None,
168 }
169 }
170
171 pub fn mean_envelope_height(&self) -> Option<f64> {
173 match (self.total_envelope_height, self.total_geometries) {
174 (Some(height), Some(count)) if count > 0 => Some(height / count as f64),
175 _ => None,
176 }
177 }
178
179 pub fn mean_envelope_area(&self) -> Option<f64> {
181 match (self.mean_envelope_width(), self.mean_envelope_height()) {
182 (Some(width), Some(height)) => Some(width * height),
183 _ => None,
184 }
185 }
186
187 pub fn mean_size_bytes(&self) -> Option<f64> {
189 match (self.total_size_bytes, self.total_geometries) {
190 (Some(bytes), Some(count)) if count > 0 => Some(bytes as f64 / count as f64),
191 _ => None,
192 }
193 }
194
195 pub fn mean_points_per_geometry(&self) -> Option<f64> {
197 match (self.total_points, self.total_geometries) {
198 (Some(points), Some(count)) if count > 0 => Some(points as f64 / count as f64),
199 _ => None,
200 }
201 }
202
203 pub fn with_total_geometries(self, count: i64) -> Self {
205 Self {
206 total_geometries: Some(count),
207 ..self
208 }
209 }
210
211 pub fn with_total_size_bytes(self, bytes: i64) -> Self {
213 Self {
214 total_size_bytes: Some(bytes),
215 ..self
216 }
217 }
218
219 pub fn with_total_points(self, points: i64) -> Self {
221 Self {
222 total_points: Some(points),
223 ..self
224 }
225 }
226
227 pub fn with_puntal_count(self, count: i64) -> Self {
229 Self {
230 puntal_count: Some(count),
231 ..self
232 }
233 }
234
235 pub fn with_lineal_count(self, count: i64) -> Self {
237 Self {
238 lineal_count: Some(count),
239 ..self
240 }
241 }
242
243 pub fn with_polygonal_count(self, count: i64) -> Self {
245 Self {
246 polygonal_count: Some(count),
247 ..self
248 }
249 }
250
251 pub fn with_collection_count(self, count: i64) -> Self {
253 Self {
254 collection_count: Some(count),
255 ..self
256 }
257 }
258
259 pub fn with_total_envelope_width(self, width: f64) -> Self {
261 Self {
262 total_envelope_width: Some(width),
263 ..self
264 }
265 }
266
267 pub fn with_total_envelope_height(self, height: f64) -> Self {
269 Self {
270 total_envelope_height: Some(height),
271 ..self
272 }
273 }
274
275 pub fn merge(&mut self, other: &Self) {
277 if let Some(other_bbox) = &other.bbox {
279 match &mut self.bbox {
280 Some(bbox) => bbox.update_box(other_bbox),
281 None => self.bbox = Some(other_bbox.clone()),
282 }
283 }
284
285 if let Some(other_types) = &other.geometry_types {
287 match &mut self.geometry_types {
288 Some(types) => {
289 types.merge(other_types);
290 }
291 None => self.geometry_types = Some(other_types.clone()),
292 }
293 }
294
295 self.total_geometries =
297 Self::merge_option_add(self.total_geometries, other.total_geometries);
298 self.total_size_bytes =
299 Self::merge_option_add(self.total_size_bytes, other.total_size_bytes);
300 self.total_points = Self::merge_option_add(self.total_points, other.total_points);
301
302 self.puntal_count = Self::merge_option_add(self.puntal_count, other.puntal_count);
304 self.lineal_count = Self::merge_option_add(self.lineal_count, other.lineal_count);
305 self.polygonal_count = Self::merge_option_add(self.polygonal_count, other.polygonal_count);
306 self.collection_count =
307 Self::merge_option_add(self.collection_count, other.collection_count);
308
309 self.total_envelope_width =
311 Self::merge_option_add_f64(self.total_envelope_width, other.total_envelope_width);
312 self.total_envelope_height =
313 Self::merge_option_add_f64(self.total_envelope_height, other.total_envelope_height);
314 }
315
316 fn merge_option_add(a: Option<i64>, b: Option<i64>) -> Option<i64> {
318 match (a, b) {
319 (Some(a_val), Some(b_val)) => Some(a_val + b_val),
320 _ => None,
321 }
322 }
323
324 fn merge_option_add_f64(a: Option<f64>, b: Option<f64>) -> Option<f64> {
326 match (a, b) {
327 (Some(a_val), Some(b_val)) => Some(a_val + b_val),
328 _ => None,
329 }
330 }
331
332 pub fn try_from_column_statistics(stats: &ColumnStatistics) -> Result<Option<Self>> {
339 let scalar = match &stats.sum_value {
340 Precision::Exact(value) => value,
341 _ => {
342 return Ok(None);
343 }
344 };
345
346 if let ScalarValue::Binary(Some(serialized)) = scalar {
347 serde_json::from_slice(serialized).map_err(|e| DataFusionError::External(Box::new(e)))
348 } else {
349 Ok(None)
350 }
351 }
352
353 pub fn to_column_statistics(&self) -> Result<ColumnStatistics> {
358 let serialized =
359 serde_json::to_vec(self).map_err(|e| DataFusionError::External(Box::new(e)))?;
360 Ok(ColumnStatistics::new_unknown()
361 .with_sum_value(Precision::Exact(ScalarValue::Binary(Some(serialized)))))
362 }
363
364 pub fn try_with_str_geometry_types(self, geometry_types: Option<&[&str]>) -> Result<Self> {
369 match geometry_types {
370 Some(strings) => {
371 let mut new_geometry_types = GeometryTypeAndDimensionsSet::new();
372 for string in strings {
373 let type_and_dim = GeometryTypeAndDimensions::from_str(string)
374 .map_err(|e| DataFusionError::External(Box::new(e)))?;
375 new_geometry_types.insert_or_ignore(&type_and_dim);
376 }
377
378 Ok(Self {
379 geometry_types: Some(new_geometry_types),
380 ..self
381 })
382 }
383 None => Ok(Self {
384 geometry_types: None,
385 ..self
386 }),
387 }
388 }
389
390 pub fn to_scalar_value(&self) -> Result<ScalarValue> {
392 let serialized = serde_json::to_vec(self).map_err(|e| {
394 DataFusionError::Internal(format!("Failed to serialize GeoStatistics: {e}"))
395 })?;
396
397 Ok(ScalarValue::Binary(Some(serialized)))
398 }
399}
400
401#[cfg(test)]
402mod test {
403 use geo_traits::Dimensions;
404 use sedona_geometry::types::GeometryTypeId;
405
406 use super::*;
407
408 #[test]
409 fn unspecified() {
410 let stats = GeoStatistics::unspecified();
411 assert_eq!(stats.bbox(), None);
412 assert_eq!(stats.geometry_types(), None);
413 assert_eq!(stats.total_geometries(), None);
414 assert_eq!(stats.total_size_bytes(), None);
415 assert_eq!(stats.total_points(), None);
416 assert_eq!(stats.puntal_count(), None);
417 assert_eq!(stats.lineal_count(), None);
418 assert_eq!(stats.polygonal_count(), None);
419 assert_eq!(stats.collection_count(), None);
420 assert_eq!(stats.total_envelope_width(), None);
421 assert_eq!(stats.total_envelope_height(), None);
422
423 let regular_stats = stats.to_column_statistics().unwrap();
424 assert_eq!(
425 GeoStatistics::try_from_column_statistics(®ular_stats)
426 .unwrap()
427 .unwrap(),
428 stats
429 );
430 }
431
432 #[test]
433 fn specified_bbox() {
434 let bbox = BoundingBox::xy((0.0, 1.0), (2.0, 3.0));
435 let stats = GeoStatistics::empty().with_bbox(Some(bbox.clone()));
437 assert_eq!(stats.bbox(), Some(&bbox));
438 assert_eq!(
439 stats.geometry_types(),
440 Some(&GeometryTypeAndDimensionsSet::new())
441 );
442
443 let regular_stats = stats.to_column_statistics().unwrap();
444 assert_eq!(
445 GeoStatistics::try_from_column_statistics(®ular_stats)
446 .unwrap()
447 .unwrap(),
448 stats
449 );
450
451 let stats_with_none = GeoStatistics::empty().with_bbox(None);
453 assert_eq!(stats_with_none.bbox(), None);
454 }
455
456 #[test]
457 fn specified_geometry_types() {
458 let mut types = GeometryTypeAndDimensionsSet::new();
459 types
460 .insert(&GeometryTypeAndDimensions::new(
461 GeometryTypeId::Polygon,
462 Dimensions::Xy,
463 ))
464 .unwrap();
465
466 let stats = GeoStatistics::empty().with_geometry_types(Some(types.clone()));
468 assert_eq!(stats.geometry_types(), Some(&types));
469 assert_eq!(
470 stats.bbox(),
471 Some(&BoundingBox::xy(Interval::empty(), Interval::empty()))
472 );
473
474 let regular_stats = stats.to_column_statistics().unwrap();
475 assert_eq!(
476 GeoStatistics::try_from_column_statistics(®ular_stats)
477 .unwrap()
478 .unwrap(),
479 stats
480 );
481
482 let stats_with_none = GeoStatistics::empty().with_geometry_types(None);
484 assert_eq!(stats_with_none.geometry_types(), None);
485 }
486
487 #[test]
488 fn specified_geometry_types_by_name() {
489 let stats = GeoStatistics::empty()
491 .try_with_str_geometry_types(Some(&["polygon", "point"]))
492 .unwrap();
493
494 let mut expected_types = GeometryTypeAndDimensionsSet::new();
495 expected_types
496 .insert(&GeometryTypeAndDimensions::new(
497 GeometryTypeId::Polygon,
498 Dimensions::Xy,
499 ))
500 .unwrap();
501 expected_types
502 .insert(&GeometryTypeAndDimensions::new(
503 GeometryTypeId::Point,
504 Dimensions::Xy,
505 ))
506 .unwrap();
507
508 assert_eq!(stats.geometry_types(), Some(&expected_types));
509 assert_eq!(
510 stats.bbox(),
511 Some(&BoundingBox::xy(Interval::empty(), Interval::empty()))
512 );
513
514 let regular_stats = stats.to_column_statistics().unwrap();
516 assert_eq!(
517 GeoStatistics::try_from_column_statistics(®ular_stats)
518 .unwrap()
519 .unwrap(),
520 stats
521 );
522 }
523
524 #[test]
525 fn from_non_geometry_stats() {
526 let stats = ColumnStatistics::new_unknown();
528 assert!(GeoStatistics::try_from_column_statistics(&stats)
529 .unwrap()
530 .is_none());
531
532 let stats = ColumnStatistics::new_unknown()
534 .with_sum_value(Precision::Exact(ScalarValue::Binary(None)));
535 assert!(GeoStatistics::try_from_column_statistics(&stats)
536 .unwrap()
537 .is_none());
538
539 let stats = ColumnStatistics::new_unknown()
541 .with_sum_value(Precision::Exact(ScalarValue::Binary(Some(vec![]))));
542 let err = GeoStatistics::try_from_column_statistics(&stats).unwrap_err();
543 assert_eq!(
544 err.message(),
545 "EOF while parsing a value at line 1 column 0"
546 )
547 }
548
549 #[test]
550 fn test_extended_stats() {
551 let stats = GeoStatistics::empty()
553 .with_total_geometries(100)
554 .with_total_size_bytes(10000)
555 .with_total_points(5000)
556 .with_puntal_count(20)
557 .with_lineal_count(30)
558 .with_polygonal_count(40)
559 .with_collection_count(10)
560 .with_total_envelope_width(500.0)
561 .with_total_envelope_height(300.0);
562
563 assert_eq!(stats.total_geometries(), Some(100));
565 assert_eq!(stats.total_size_bytes(), Some(10000));
566 assert_eq!(stats.total_points(), Some(5000));
567 assert_eq!(stats.puntal_count(), Some(20));
568 assert_eq!(stats.lineal_count(), Some(30));
569 assert_eq!(stats.polygonal_count(), Some(40));
570 assert_eq!(stats.collection_count(), Some(10));
571 assert_eq!(stats.total_envelope_width(), Some(500.0));
572 assert_eq!(stats.total_envelope_height(), Some(300.0));
573
574 assert_eq!(stats.mean_size_bytes(), Some(100.0));
576 assert_eq!(stats.mean_points_per_geometry(), Some(50.0));
577 assert_eq!(stats.mean_envelope_width(), Some(5.0));
578 assert_eq!(stats.mean_envelope_height(), Some(3.0));
579 assert_eq!(stats.mean_envelope_area(), Some(15.0));
580
581 let column_stats = stats.to_column_statistics().unwrap();
583 let deserialized = GeoStatistics::try_from_column_statistics(&column_stats)
584 .unwrap()
585 .unwrap();
586 assert_eq!(deserialized, stats);
587 }
588
589 #[test]
590 fn test_merge_extended_stats() {
591 let stats1 = GeoStatistics::empty()
593 .with_total_geometries(50)
594 .with_total_size_bytes(5000)
595 .with_total_points(2500)
596 .with_puntal_count(10)
597 .with_lineal_count(15)
598 .with_polygonal_count(20)
599 .with_collection_count(5)
600 .with_total_envelope_width(250.0)
601 .with_total_envelope_height(150.0);
602
603 let stats2 = GeoStatistics::empty()
604 .with_total_geometries(50)
605 .with_total_size_bytes(5000)
606 .with_total_points(2500)
607 .with_puntal_count(10)
608 .with_lineal_count(15)
609 .with_polygonal_count(20)
610 .with_collection_count(5)
611 .with_total_envelope_width(250.0)
612 .with_total_envelope_height(150.0);
613
614 let mut merged = stats1.clone();
616 merged.merge(&stats2);
617
618 assert_eq!(merged.total_geometries(), Some(100));
620 assert_eq!(merged.total_size_bytes(), Some(10000));
621 assert_eq!(merged.total_points(), Some(5000));
622 assert_eq!(merged.puntal_count(), Some(20));
623 assert_eq!(merged.lineal_count(), Some(30));
624 assert_eq!(merged.polygonal_count(), Some(40));
625 assert_eq!(merged.collection_count(), Some(10));
626 assert_eq!(merged.total_envelope_width(), Some(500.0));
627 assert_eq!(merged.total_envelope_height(), Some(300.0));
628
629 let column_stats = merged.to_column_statistics().unwrap();
631 let deserialized = GeoStatistics::try_from_column_statistics(&column_stats)
632 .unwrap()
633 .unwrap();
634 assert_eq!(deserialized, merged);
635 }
636
637 #[test]
638 fn test_partial_merge() {
639 let stats1 = GeoStatistics::empty()
640 .with_total_geometries(50)
641 .with_total_size_bytes(5000);
642
643 let stats2 = GeoStatistics::empty()
644 .with_puntal_count(20)
645 .with_lineal_count(30);
646
647 let mut merged = stats1.clone();
648 merged.merge(&stats2);
649
650 assert_eq!(merged.total_geometries(), Some(50));
652 assert_eq!(merged.total_size_bytes(), Some(5000));
653 assert_eq!(merged.puntal_count(), Some(20));
654 assert_eq!(merged.lineal_count(), Some(30));
655 assert_eq!(merged.polygonal_count(), Some(0));
656
657 let column_stats = merged.to_column_statistics().unwrap();
659 let deserialized = GeoStatistics::try_from_column_statistics(&column_stats)
660 .unwrap()
661 .unwrap();
662 assert_eq!(deserialized, merged);
663 }
664}