1use arrow_array::{ArrayRef, RecordBatch, RecordBatchReader};
27use arrow_array::{BinaryArray, BinaryViewArray};
28use arrow_array::{Float64Array, Int32Array};
29use arrow_schema::{ArrowError, DataType, Field, Schema, SchemaRef};
30use datafusion_common::Result;
31use geo_types::{
32 Coord, Geometry, GeometryCollection, LineString, MultiLineString, MultiPoint, MultiPolygon,
33 Point, Polygon, Rect,
34};
35use rand::distributions::Uniform;
36use rand::rngs::StdRng;
37use rand::{Rng, SeedableRng};
38use sedona_common::sedona_internal_err;
39use sedona_geometry::types::GeometryTypeId;
40use sedona_schema::datatypes::{SedonaType, WKB_GEOMETRY};
41use std::f64::consts::PI;
42use std::sync::Arc;
43use wkb::writer::WriteOptions;
44use wkb::Endianness;
45
46#[derive(Debug, Clone)]
92pub struct RandomPartitionedDataBuilder {
93 pub seed: u64,
94 pub num_partitions: usize,
95 pub batches_per_partition: usize,
96 pub rows_per_batch: usize,
97 sedona_type: SedonaType,
98 null_rate: f64,
99 options: RandomGeometryOptions,
100}
101
102impl Default for RandomPartitionedDataBuilder {
103 fn default() -> Self {
104 let options = RandomGeometryOptions::new();
105
106 Self {
107 seed: 42,
108 num_partitions: 1,
109 batches_per_partition: 1,
110 rows_per_batch: 10,
111 sedona_type: WKB_GEOMETRY,
112 null_rate: 0.0,
113 options,
114 }
115 }
116}
117
118impl RandomPartitionedDataBuilder {
119 pub fn new() -> Self {
136 Self::default()
137 }
138
139 pub fn seed(mut self, seed: u64) -> Self {
148 self.seed = seed;
149 self
150 }
151
152 pub fn num_partitions(mut self, num_partitions: usize) -> Self {
161 self.num_partitions = num_partitions;
162 self
163 }
164
165 pub fn batches_per_partition(mut self, batches_per_partition: usize) -> Self {
173 self.batches_per_partition = batches_per_partition;
174 self
175 }
176
177 pub fn rows_per_batch(mut self, rows_per_batch: usize) -> Self {
185 self.rows_per_batch = rows_per_batch;
186 self
187 }
188
189 pub fn geometry_type(mut self, geom_type: GeometryTypeId) -> Self {
200 self.options.geom_type = geom_type;
201 self
202 }
203
204 pub fn sedona_type(mut self, sedona_type: SedonaType) -> Self {
212 self.sedona_type = sedona_type;
213 self
214 }
215
216 pub fn bounds(mut self, bounds: Rect) -> Self {
225 self.options.bounds = bounds;
226 self
227 }
228
229 pub fn size_range(mut self, size_range: (f64, f64)) -> Self {
238 self.options.size_range = size_range;
239 self
240 }
241
242 pub fn null_rate(mut self, null_rate: f64) -> Self {
248 self.null_rate = null_rate;
249 self
250 }
251
252 pub fn empty_rate(mut self, empty_rate: f64) -> Self {
258 self.options.empty_rate = empty_rate;
259 self
260 }
261
262 pub fn vertices_per_linestring_range(
271 mut self,
272 vertices_per_linestring_range: (usize, usize),
273 ) -> Self {
274 self.options.vertices_per_linestring_range = vertices_per_linestring_range;
275 self
276 }
277
278 pub fn num_parts_range(mut self, num_parts_range: (usize, usize)) -> Self {
285 self.options.num_parts_range = num_parts_range;
286 self
287 }
288
289 pub fn polygon_hole_rate(mut self, polygon_hole_rate: f64) -> Self {
296 self.options.polygon_hole_rate = polygon_hole_rate;
297 self
298 }
299
300 pub fn schema(&self) -> SchemaRef {
308 Arc::new(Schema::new(vec![
310 Field::new("id", DataType::Int32, false),
311 Field::new("dist", DataType::Float64, false),
312 self.sedona_type.to_storage_field("geometry", true).unwrap(),
313 ]))
314 }
315
316 pub fn build(&self) -> Result<(SchemaRef, Vec<Vec<RecordBatch>>)> {
337 let schema = self.schema();
339 let mut result = Vec::with_capacity(self.num_partitions);
340
341 for partition_idx in 0..self.num_partitions {
342 let rng = Self::default_rng(self.seed + partition_idx as u64);
343 let partition_batches = self
344 .partition_reader(rng, partition_idx)
345 .collect::<Result<Vec<_>, ArrowError>>()?;
346 result.push(partition_batches);
347 }
348
349 Ok((schema, result))
350 }
351
352 pub fn default_rng(seed: u64) -> impl Rng {
356 StdRng::seed_from_u64(seed)
357 }
358
359 pub fn partition_reader<R: Rng + Send + 'static>(
361 &self,
362 rng: R,
363 partition_idx: usize,
364 ) -> Box<dyn RecordBatchReader + Send> {
365 let reader = RandomPartitionedDataReader {
366 builder: self.clone(),
367 schema: self.schema(),
368 partition_idx,
369 batch_idx: 0,
370 rng,
371 };
372
373 Box::new(reader)
374 }
375
376 fn generate_batch<R: Rng>(
378 &self,
379 rng: &mut R,
380 schema: &SchemaRef,
381 partition_idx: usize,
382 batch_idx: usize,
383 ) -> Result<RecordBatch> {
384 let id_start =
386 (partition_idx * self.batches_per_partition + batch_idx) * self.rows_per_batch;
387 let ids: Vec<i32> = (0..self.rows_per_batch)
388 .map(|i| (id_start + i) as i32)
389 .collect();
390
391 let distance_dist = Uniform::new(0.0, 100.0);
393 let distances: Vec<f64> = (0..self.rows_per_batch)
394 .map(|_| rng.sample(distance_dist))
395 .collect();
396
397 let wkb_geometries: Vec<Option<Vec<u8>>> = (0..self.rows_per_batch)
399 .map(|_| {
400 if rng.sample(Uniform::new(0.0, 1.0)) < self.null_rate {
401 None
402 } else {
403 Some(generate_random_wkb(rng, &self.options))
404 }
405 })
406 .collect();
407
408 let id_array = Arc::new(Int32Array::from(ids));
410 let dist_array = Arc::new(Float64Array::from(distances));
411 let geometry_array = create_wkb_array(wkb_geometries, &self.sedona_type)?;
412
413 Ok(RecordBatch::try_new(
415 schema.clone(),
416 vec![id_array, dist_array, geometry_array],
417 )?)
418 }
419}
420
421fn create_wkb_array(
423 wkb_values: Vec<Option<Vec<u8>>>,
424 sedona_type: &SedonaType,
425) -> Result<ArrayRef> {
426 match sedona_type {
427 SedonaType::Wkb(_, _) => Ok(Arc::new(BinaryArray::from_iter(wkb_values))),
428 SedonaType::WkbView(_, _) => Ok(Arc::new(BinaryViewArray::from_iter(wkb_values))),
429 _ => sedona_internal_err!("create_wkb_array not implemented for {sedona_type:?}"),
430 }
431}
432
433struct RandomPartitionedDataReader<R> {
434 builder: RandomPartitionedDataBuilder,
435 schema: SchemaRef,
436 partition_idx: usize,
437 batch_idx: usize,
438 rng: R,
439}
440
441impl<R: Rng> RecordBatchReader for RandomPartitionedDataReader<R> {
442 fn schema(&self) -> SchemaRef {
443 self.builder.schema()
444 }
445}
446
447impl<R: Rng> Iterator for RandomPartitionedDataReader<R> {
448 type Item = std::result::Result<RecordBatch, ArrowError>;
449
450 fn next(&mut self) -> Option<Self::Item> {
451 if self.batch_idx == self.builder.batches_per_partition {
452 return None;
453 }
454
455 let maybe_batch = self
456 .builder
457 .generate_batch(
458 &mut self.rng,
459 &self.schema,
460 self.partition_idx,
461 self.batch_idx,
462 )
463 .map_err(|e| ArrowError::ExternalError(Box::new(e)));
464 self.batch_idx += 1;
465 Some(maybe_batch)
466 }
467}
468
469#[derive(Debug, Clone)]
471struct RandomGeometryOptions {
472 geom_type: GeometryTypeId,
473 bounds: Rect,
474 size_range: (f64, f64),
475 vertices_per_linestring_range: (usize, usize),
476 empty_rate: f64,
477 polygon_hole_rate: f64,
478 num_parts_range: (usize, usize),
479}
480
481impl RandomGeometryOptions {
482 fn new() -> Self {
483 Self {
484 geom_type: GeometryTypeId::Point,
485 empty_rate: 0.0,
486 bounds: Rect::new(Coord { x: 0.0, y: 0.0 }, Coord { x: 100.0, y: 100.0 }),
487 size_range: (1.0, 10.0),
488 vertices_per_linestring_range: (4, 4),
489 polygon_hole_rate: 0.0,
490 num_parts_range: (1, 3),
491 }
492 }
493}
494
495impl Default for RandomGeometryOptions {
496 fn default() -> Self {
497 Self::new()
498 }
499}
500
501fn generate_random_wkb<R: rand::Rng>(rng: &mut R, options: &RandomGeometryOptions) -> Vec<u8> {
503 let geometry = generate_random_geometry(rng, options);
504
505 let mut out: Vec<u8> = vec![];
507 wkb::writer::write_geometry(
508 &mut out,
509 &geometry,
510 &WriteOptions {
511 endianness: Endianness::LittleEndian,
512 },
513 )
514 .unwrap();
515 out
516}
517
518fn generate_random_geometry<R: rand::Rng>(
519 rng: &mut R,
520 options: &RandomGeometryOptions,
521) -> Geometry {
522 match options.geom_type {
523 GeometryTypeId::Point => Geometry::Point(generate_random_point(rng, options)),
524 GeometryTypeId::LineString => {
525 Geometry::LineString(generate_random_linestring(rng, options))
526 }
527 GeometryTypeId::Polygon => Geometry::Polygon(generate_random_polygon(rng, options)),
528 GeometryTypeId::MultiPoint => {
529 Geometry::MultiPoint(generate_random_multipoint(rng, options))
530 }
531 GeometryTypeId::MultiLineString => {
532 Geometry::MultiLineString(generate_random_multilinestring(rng, options))
533 }
534 GeometryTypeId::MultiPolygon => {
535 Geometry::MultiPolygon(generate_random_multipolygon(rng, options))
536 }
537 GeometryTypeId::GeometryCollection => {
538 Geometry::GeometryCollection(generate_random_geometrycollection(rng, options))
539 }
540 GeometryTypeId::Geometry => {
541 let mut copy_options = options.clone();
542 copy_options.geom_type = pick_random_geometry_type(rng);
543 generate_random_geometry(rng, ©_options)
544 }
545 }
546}
547
548fn generate_random_point<R: rand::Rng>(rng: &mut R, options: &RandomGeometryOptions) -> Point {
549 if rng.gen_bool(options.empty_rate) {
550 Point::new(f64::NAN, f64::NAN)
554 } else {
555 let x_dist = Uniform::new(options.bounds.min().x, options.bounds.max().x);
557 let y_dist = Uniform::new(options.bounds.min().y, options.bounds.max().y);
558 let x = rng.sample(x_dist);
559 let y = rng.sample(y_dist);
560 Point::new(x, y)
561 }
562}
563
564fn generate_random_linestring<R: rand::Rng>(
565 rng: &mut R,
566 options: &RandomGeometryOptions,
567) -> LineString {
568 if rng.gen_bool(options.empty_rate) {
569 LineString::new(vec![])
570 } else {
571 let (center_x, center_y, half_size) = generate_random_circle(rng, options);
572 let vertices_dist = Uniform::new_inclusive(
573 options.vertices_per_linestring_range.0,
574 options.vertices_per_linestring_range.1,
575 );
576 let num_vertices = rng.sample(vertices_dist).max(2);
578 let coords =
579 generate_circular_vertices(rng, center_x, center_y, half_size, num_vertices, false);
580 LineString::from(coords)
581 }
582}
583
584fn generate_random_polygon<R: rand::Rng>(rng: &mut R, options: &RandomGeometryOptions) -> Polygon {
585 if rng.gen_bool(options.empty_rate) {
586 Polygon::new(LineString::new(vec![]), vec![])
587 } else {
588 let (center_x, center_y, half_size) = generate_random_circle(rng, options);
589 let vertices_dist = Uniform::new_inclusive(
590 options.vertices_per_linestring_range.0,
591 options.vertices_per_linestring_range.1,
592 );
593 let num_vertices = rng.sample(vertices_dist).max(3);
595 let coords =
596 generate_circular_vertices(rng, center_x, center_y, half_size, num_vertices, true);
597 let shell = LineString::from(coords);
598 let mut holes = Vec::new();
599
600 let add_hole = rng.gen_bool(options.polygon_hole_rate);
602 let hole_scale_factor_dist = Uniform::new(0.1, 0.5);
603 let hole_scale_factor = rng.sample(hole_scale_factor_dist);
604 if add_hole {
605 let new_size = half_size * hole_scale_factor;
606 let mut coords =
607 generate_circular_vertices(rng, center_x, center_y, new_size, num_vertices, true);
608 coords.reverse();
609 holes.push(LineString::from(coords));
610 }
611
612 Polygon::new(shell, holes)
613 }
614}
615
616fn generate_random_multipoint<R: rand::Rng>(
617 rng: &mut R,
618 options: &RandomGeometryOptions,
619) -> MultiPoint {
620 if rng.gen_bool(options.empty_rate) {
621 MultiPoint::new(vec![])
622 } else {
623 let children = generate_random_children(rng, options, generate_random_point);
624 MultiPoint::new(children)
625 }
626}
627
628fn generate_random_multilinestring<R: rand::Rng>(
629 rng: &mut R,
630 options: &RandomGeometryOptions,
631) -> MultiLineString {
632 if rng.gen_bool(options.empty_rate) {
633 MultiLineString::new(vec![])
634 } else {
635 let children = generate_random_children(rng, options, generate_random_linestring);
636 MultiLineString::new(children)
637 }
638}
639
640fn generate_random_multipolygon<R: rand::Rng>(
641 rng: &mut R,
642 options: &RandomGeometryOptions,
643) -> MultiPolygon {
644 if rng.gen_bool(options.empty_rate) {
645 MultiPolygon::new(vec![])
646 } else {
647 let children = generate_random_children(rng, options, generate_random_polygon);
648 MultiPolygon::new(children)
649 }
650}
651
652fn generate_random_geometrycollection<R: rand::Rng>(
653 rng: &mut R,
654 options: &RandomGeometryOptions,
655) -> GeometryCollection {
656 if rng.gen_bool(options.empty_rate) {
657 GeometryCollection::new_from(vec![])
658 } else {
659 let children = generate_random_children(rng, options, generate_random_geometry);
660 GeometryCollection::new_from(children)
661 }
662}
663
664fn generate_random_children<R: Rng, T, F: Fn(&mut R, &RandomGeometryOptions) -> T>(
665 rng: &mut R,
666 options: &RandomGeometryOptions,
667 func: F,
668) -> Vec<T> {
669 let num_parts_dist =
670 Uniform::new_inclusive(options.num_parts_range.0, options.num_parts_range.1);
671 let num_parts = rng.sample(num_parts_dist);
672
673 let (center_x, center_y, half_width) = generate_random_circle(rng, options);
675 let feature_bounds = Rect::new(
676 Coord {
677 x: center_x - half_width,
678 y: center_y - half_width,
679 },
680 Coord {
681 x: center_x + half_width,
682 y: center_y + half_width,
683 },
684 );
685
686 let child_bounds = generate_non_overlapping_sub_rectangles(num_parts, &feature_bounds);
687 let mut child_options = options.clone();
688 child_options.empty_rate = 0.0;
689
690 let mut children = Vec::new();
691 for bounds in child_bounds {
692 child_options.bounds = bounds;
693 let child_size = bounds.height().min(bounds.width());
694 child_options.size_range = (child_size * 0.9, child_size);
695
696 if options.geom_type == GeometryTypeId::GeometryCollection {
699 child_options.geom_type = pick_random_geometry_type(rng);
700 }
701 children.push(func(rng, &child_options));
702 }
703
704 children
705}
706
707fn pick_random_geometry_type<R: Rng>(rng: &mut R) -> GeometryTypeId {
708 [
709 GeometryTypeId::Point,
710 GeometryTypeId::LineString,
711 GeometryTypeId::Polygon,
712 GeometryTypeId::MultiPoint,
713 GeometryTypeId::MultiLineString,
714 GeometryTypeId::MultiPolygon,
715 ][rng.gen_range(0..6)]
716}
717
718fn generate_random_circle<R: rand::Rng>(
719 rng: &mut R,
720 options: &RandomGeometryOptions,
721) -> (f64, f64, f64) {
722 let size_dist = Uniform::new(options.size_range.0, options.size_range.1);
724 let half_size = rng.sample(size_dist) / 2.0;
725
726 let center_x_dist = Uniform::new(
728 options.bounds.min().x + half_size,
729 options.bounds.max().x - half_size,
730 );
731 let center_y_dist = Uniform::new(
732 options.bounds.min().y + half_size,
733 options.bounds.max().y - half_size,
734 );
735 let center_x = rng.sample(center_x_dist);
736 let center_y = rng.sample(center_y_dist);
737
738 (center_x, center_y, half_size)
739}
740
741fn generate_non_overlapping_sub_rectangles(num_parts: usize, bounds: &Rect) -> Vec<Rect> {
742 let mut tiles = vec![*bounds];
743 let mut n = 0;
744 while tiles.len() < num_parts {
745 let (largest_idx, _) = tiles
747 .iter()
748 .enumerate()
749 .map(|(i, rect)| (i, rect.height() * rect.width()))
750 .max_by(|(_, a1), (_, a2)| a1.partial_cmp(a2).unwrap())
751 .unwrap_or((0, 0.0));
752
753 let new_rects = if (n % 2) == 0 {
755 tiles[largest_idx].split_x()
756 } else {
757 tiles[largest_idx].split_y()
758 };
759
760 tiles.remove(largest_idx);
762 tiles.insert(largest_idx, new_rects[0]);
763 tiles.insert(largest_idx, new_rects[1]);
764 n += 1;
765 }
766
767 tiles
768}
769
770fn generate_circular_vertices<R: rand::Rng>(
771 rng: &mut R,
772 center_x: f64,
773 center_y: f64,
774 radius: f64,
775 num_vertices: usize,
776 closed: bool,
777) -> Vec<Coord> {
778 let mut out = Vec::new();
779
780 let start_angle_dist = Uniform::new(0.0, 2.0 * PI);
782 let mut angle: f64 = rng.sample(start_angle_dist);
783
784 let dangle = 2.0 * PI / (num_vertices as f64).max(3.0);
785 for _ in 0..num_vertices {
786 out.push(Coord {
787 x: angle.cos() * radius + center_x,
788 y: angle.sin() * radius + center_y,
789 });
790 angle += dangle;
791 }
792
793 if closed {
794 out.push(out[0]);
795 }
796
797 out
798}
799
800#[cfg(test)]
801mod tests {
802 use super::*;
803 use arrow_schema::DataType;
804 use geo_traits::{MultiLineStringTrait, MultiPolygonTrait};
805 use geo_types::Coord;
806 use rand::rngs::StdRng;
807 use rand::SeedableRng;
808 use rstest::rstest;
809 use sedona_geometry::{
810 analyze::analyze_geometry, bounds::wkb_bounds_xy, interval::IntervalTrait,
811 };
812
813 #[test]
814 fn test_generate_random_geometry_produces_valid_wkb() {
815 let bounds = Rect::new(Coord { x: 10.0, y: 10.0 }, Coord { x: 90.0, y: 90.0 });
816 let size_range = (1.0, 10.0);
817
818 let test_cases = vec![
820 (GeometryTypeId::Point, 42, 100, 20, 50), (GeometryTypeId::Polygon, 123, 50, 80, 200),
822 ];
823
824 for (geom_type, seed, iterations, min_size, max_size) in test_cases {
825 let mut rng = StdRng::seed_from_u64(seed);
826 let options = RandomGeometryOptions {
827 geom_type,
828 bounds,
829 size_range,
830 ..Default::default()
831 };
832
833 for _ in 0..iterations {
834 let wkb_bytes = generate_random_wkb(&mut rng, &options);
835
836 assert!(!wkb_bytes.is_empty());
838 assert!(
839 wkb_bytes.len() >= min_size,
840 "WKB size {} is smaller than expected minimum {} for {:?}",
841 wkb_bytes.len(),
842 min_size,
843 geom_type
844 );
845 assert!(
846 wkb_bytes.len() <= max_size,
847 "WKB size {} is larger than expected maximum {} for {:?}",
848 wkb_bytes.len(),
849 max_size,
850 geom_type
851 );
852
853 wkb::reader::read_wkb(&wkb_bytes).unwrap();
855 }
856 }
857 }
858
859 #[test]
860 fn test_generate_random_geometry_deterministic() {
861 let bounds = Rect::new(Coord { x: 0.0, y: 0.0 }, Coord { x: 100.0, y: 100.0 });
862 let size_range = (1.0, 10.0);
863
864 let geom_types = [GeometryTypeId::Point, GeometryTypeId::Polygon];
865
866 let mut rng1 = StdRng::seed_from_u64(42);
868 let mut rng2 = StdRng::seed_from_u64(42);
869
870 for geom_type in geom_types {
871 let options = RandomGeometryOptions {
872 geom_type,
873 bounds,
874 size_range,
875 ..Default::default()
876 };
877 let wkb1 = generate_random_wkb(&mut rng1, &options);
878 let wkb2 = generate_random_wkb(&mut rng2, &options);
879
880 assert_eq!(wkb1, wkb2);
882 }
883 }
884
885 #[test]
886 fn test_random_partitioned_data_builder_build_basic() {
887 let (schema, partitions) = RandomPartitionedDataBuilder::new()
888 .num_partitions(2)
889 .batches_per_partition(3)
890 .rows_per_batch(4)
891 .null_rate(0.0) .build()
893 .unwrap();
894
895 assert_eq!(schema.fields().len(), 3);
897 assert_eq!(schema.field(0).name(), "id");
898 assert_eq!(schema.field(0).data_type(), &DataType::Int32);
899 assert_eq!(schema.field(1).name(), "dist");
900 assert_eq!(schema.field(1).data_type(), &DataType::Float64);
901 assert_eq!(schema.field(2).name(), "geometry");
902
903 assert_eq!(partitions.len(), 2); for partition in &partitions {
907 assert_eq!(partition.len(), 3); for batch in partition {
910 assert_eq!(batch.num_rows(), 4); assert_eq!(batch.num_columns(), 3);
912 }
913 }
914 }
915
916 #[test]
917 fn test_random_partitioned_data_builder_unique_ids() {
918 let (_, partitions) = RandomPartitionedDataBuilder::new()
919 .num_partitions(2)
920 .batches_per_partition(2)
921 .rows_per_batch(3)
922 .build()
923 .unwrap();
924
925 let mut all_ids = Vec::new();
926
927 for partition in &partitions {
928 for batch in partition {
929 let id_array = batch
930 .column(0)
931 .as_any()
932 .downcast_ref::<Int32Array>()
933 .unwrap();
934 for i in 0..id_array.len() {
935 all_ids.push(id_array.value(i));
936 }
937 }
938 }
939
940 all_ids.sort();
942 for i in 1..all_ids.len() {
943 assert_ne!(
944 all_ids[i - 1],
945 all_ids[i],
946 "Found duplicate ID: {}",
947 all_ids[i]
948 );
949 }
950
951 for (i, &id) in all_ids.iter().enumerate() {
953 assert_eq!(id, i as i32);
954 }
955 }
956
957 #[test]
958 fn test_random_partitioned_data_builder_null_rate() {
959 let (_, partitions) = RandomPartitionedDataBuilder::new()
960 .rows_per_batch(100)
961 .null_rate(0.5) .build()
963 .unwrap();
964
965 let batch = &partitions[0][0];
966 let geometry_array = batch.column(2);
967
968 let null_count = geometry_array.null_count();
969 let total_count = geometry_array.len();
970 let null_rate = null_count as f64 / total_count as f64;
971
972 assert!(
974 (0.3..=0.7).contains(&null_rate),
975 "Expected null rate around 0.5, got {null_rate}"
976 );
977 }
978
979 #[test]
980 fn test_random_partitioned_data_builder_deterministic() {
981 let bounds = Rect::new(Coord { x: 0.0, y: 0.0 }, Coord { x: 100.0, y: 100.0 });
982
983 let (schema1, partitions1) = RandomPartitionedDataBuilder::new()
984 .seed(999)
985 .num_partitions(2)
986 .batches_per_partition(2)
987 .rows_per_batch(5)
988 .bounds(bounds)
989 .build()
990 .unwrap();
991
992 let (schema2, partitions2) = RandomPartitionedDataBuilder::new()
993 .seed(999) .num_partitions(2)
995 .batches_per_partition(2)
996 .rows_per_batch(5)
997 .bounds(bounds)
998 .build()
999 .unwrap();
1000
1001 assert_eq!(schema1, schema2);
1003
1004 assert_eq!(partitions1.len(), partitions2.len());
1006 for (partition1, partition2) in partitions1.iter().zip(partitions2.iter()) {
1007 assert_eq!(partition1.len(), partition2.len());
1008 for (batch1, batch2) in partition1.iter().zip(partition2.iter()) {
1009 let ids1 = batch1
1011 .column(0)
1012 .as_any()
1013 .downcast_ref::<Int32Array>()
1014 .unwrap();
1015 let ids2 = batch2
1016 .column(0)
1017 .as_any()
1018 .downcast_ref::<Int32Array>()
1019 .unwrap();
1020 assert_eq!(ids1, ids2);
1021
1022 let dists1 = batch1
1024 .column(1)
1025 .as_any()
1026 .downcast_ref::<Float64Array>()
1027 .unwrap();
1028 let dists2 = batch2
1029 .column(1)
1030 .as_any()
1031 .downcast_ref::<Float64Array>()
1032 .unwrap();
1033 assert_eq!(dists1, dists2);
1034 }
1035 }
1036 }
1037
1038 #[test]
1039 fn test_random_partitioned_data_builder_different_seeds() {
1040 let bounds = Rect::new(Coord { x: 0.0, y: 0.0 }, Coord { x: 100.0, y: 100.0 });
1041
1042 let (_, partitions1) = RandomPartitionedDataBuilder::new()
1043 .seed(111)
1044 .rows_per_batch(10)
1045 .bounds(bounds)
1046 .build()
1047 .unwrap();
1048
1049 let (_, partitions2) = RandomPartitionedDataBuilder::new()
1050 .seed(222) .rows_per_batch(10)
1052 .bounds(bounds)
1053 .build()
1054 .unwrap();
1055
1056 let dists1 = partitions1[0][0]
1058 .column(1)
1059 .as_any()
1060 .downcast_ref::<Float64Array>()
1061 .unwrap();
1062 let dists2 = partitions2[0][0]
1063 .column(1)
1064 .as_any()
1065 .downcast_ref::<Float64Array>()
1066 .unwrap();
1067
1068 let mut found_difference = false;
1070 for i in 0..dists1.len() {
1071 if (dists1.value(i) - dists2.value(i)).abs() > f64::EPSILON {
1072 found_difference = true;
1073 break;
1074 }
1075 }
1076 assert!(
1077 found_difference,
1078 "Expected different random data with different seeds"
1079 );
1080 }
1081
1082 #[test]
1083 fn test_random_linestring_num_vertices() {
1084 let mut rng = StdRng::seed_from_u64(123);
1085 let mut options = RandomGeometryOptions::new();
1086 options.vertices_per_linestring_range = (3, 3);
1087 for _ in 0..100 {
1088 let geom = generate_random_linestring(&mut rng, &options);
1089 assert_eq!(geom.coords().count(), 3);
1090 }
1091
1092 options.vertices_per_linestring_range = (50, 50);
1093 for _ in 0..100 {
1094 let geom = generate_random_linestring(&mut rng, &options);
1095 assert_eq!(geom.coords().count(), 50);
1096 }
1097 }
1098
1099 #[test]
1100 fn test_random_polygon_has_hole() {
1101 let mut rng = StdRng::seed_from_u64(123);
1102 let mut options = RandomGeometryOptions::new();
1103
1104 options.polygon_hole_rate = 0.0;
1105 for _ in 0..100 {
1106 let geom = generate_random_polygon(&mut rng, &options);
1107 assert_eq!(geom.interiors().len(), 0);
1108 }
1109
1110 options.polygon_hole_rate = 1.0;
1111 for _ in 0..100 {
1112 let geom = generate_random_polygon(&mut rng, &options);
1113 assert!(!geom.interiors().is_empty());
1114 }
1115 }
1116
1117 #[test]
1118 fn test_random_multipoint_part_count() {
1119 let mut rng = StdRng::seed_from_u64(123);
1120 let mut options = RandomGeometryOptions::new();
1121
1122 options.num_parts_range = (3, 3);
1123 for _ in 0..100 {
1124 let geom = generate_random_multipoint(&mut rng, &options);
1125 assert_eq!(geom.len(), 3);
1126 }
1127
1128 options.num_parts_range = (10, 10);
1129 for _ in 0..100 {
1130 let geom = generate_random_multipoint(&mut rng, &options);
1131 assert_eq!(geom.len(), 10);
1132 }
1133 }
1134
1135 #[test]
1136 fn test_random_multilinestring_part_count() {
1137 let mut rng = StdRng::seed_from_u64(123);
1138 let mut options = RandomGeometryOptions::new();
1139
1140 options.num_parts_range = (3, 3);
1141 for _ in 0..100 {
1142 let geom = generate_random_multilinestring(&mut rng, &options);
1143 assert_eq!(geom.num_line_strings(), 3);
1144 }
1145
1146 options.num_parts_range = (10, 10);
1147 for _ in 0..100 {
1148 let geom = generate_random_multilinestring(&mut rng, &options);
1149 assert_eq!(geom.num_line_strings(), 10);
1150 }
1151 }
1152
1153 #[test]
1154 fn test_random_multipolygon_part_count() {
1155 let mut rng = StdRng::seed_from_u64(123);
1156 let mut options = RandomGeometryOptions::new();
1157
1158 options.num_parts_range = (3, 3);
1159 for _ in 0..100 {
1160 let geom = generate_random_multipolygon(&mut rng, &options);
1161 assert_eq!(geom.num_polygons(), 3);
1162 }
1163
1164 options.num_parts_range = (10, 10);
1165 for _ in 0..100 {
1166 let geom = generate_random_multipolygon(&mut rng, &options);
1167 assert_eq!(geom.num_polygons(), 10);
1168 }
1169 }
1170
1171 #[test]
1172 fn test_random_geometrycollection_part_count() {
1173 let mut rng = StdRng::seed_from_u64(123);
1174 let mut options = RandomGeometryOptions::new();
1175
1176 options.num_parts_range = (3, 3);
1177 for _ in 0..100 {
1178 let geom = generate_random_geometrycollection(&mut rng, &options);
1179 assert_eq!(geom.len(), 3);
1180 }
1181
1182 options.num_parts_range = (10, 10);
1183 for _ in 0..100 {
1184 let geom = generate_random_geometrycollection(&mut rng, &options);
1185 assert_eq!(geom.len(), 10);
1186 }
1187 }
1188
1189 #[rstest]
1190 fn test_random_geometry_type(
1191 #[values(
1192 GeometryTypeId::Point,
1193 GeometryTypeId::LineString,
1194 GeometryTypeId::Polygon,
1195 GeometryTypeId::MultiPoint,
1196 GeometryTypeId::MultiLineString,
1197 GeometryTypeId::MultiPolygon,
1198 GeometryTypeId::GeometryCollection
1199 )]
1200 geom_type: GeometryTypeId,
1201 ) {
1202 let mut rng = StdRng::seed_from_u64(123);
1203 let mut options = RandomGeometryOptions::new();
1204 options.geom_type = geom_type;
1205
1206 options.empty_rate = 0.0;
1207 for _ in 0..100 {
1208 let geom = generate_random_wkb(&mut rng, &options);
1209 let wkb = wkb::reader::read_wkb(&geom).unwrap();
1210 let analysis = analyze_geometry(&wkb).unwrap();
1211 assert_eq!(analysis.geometry_type.geometry_type(), geom_type);
1212 }
1213 }
1214
1215 #[rstest]
1216 fn test_random_emptiness(
1217 #[values(
1218 GeometryTypeId::Point,
1219 GeometryTypeId::LineString,
1220 GeometryTypeId::Polygon,
1221 GeometryTypeId::MultiPoint,
1222 GeometryTypeId::MultiLineString,
1223 GeometryTypeId::MultiPolygon,
1224 GeometryTypeId::GeometryCollection
1225 )]
1226 geom_type: GeometryTypeId,
1227 ) {
1228 let mut rng = StdRng::seed_from_u64(123);
1229 let mut options = RandomGeometryOptions::new();
1230 options.geom_type = geom_type;
1231
1232 options.empty_rate = 0.0;
1233 for _ in 0..100 {
1234 let geom = generate_random_wkb(&mut rng, &options);
1235 let bounds = wkb_bounds_xy(&geom).unwrap();
1236 assert!(!bounds.x().is_empty());
1237 assert!(!bounds.y().is_empty());
1238
1239 assert!(
1240 bounds.x().lo() >= options.bounds.min().x
1241 && bounds.y().lo() >= options.bounds.min().y
1242 && bounds.x().hi() <= options.bounds.max().x
1243 && bounds.y().hi() <= options.bounds.max().y
1244 );
1245 }
1246
1247 options.empty_rate = 1.0;
1248 for _ in 0..100 {
1249 let geom = generate_random_wkb(&mut rng, &options);
1250 let bounds = wkb_bounds_xy(&geom).unwrap();
1251 assert!(bounds.x().is_empty());
1252 assert!(bounds.y().is_empty());
1253 }
1254 }
1255}