sedona_spatial_join/partitioning.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use datafusion_common::Result;
19use sedona_geometry::bounding_box::BoundingBox;
20
21pub mod broadcast;
22pub mod flat;
23pub mod kdb;
24pub(crate) mod partition_slots;
25pub mod round_robin;
26pub mod rtree;
27pub mod stream_repartitioner;
28pub(crate) mod util;
29
30/// Spatial partitioning is different from traditional data partitioning such as hash partitioning.
31/// There is no perfect spatial partitioner that can partition spatial objects with extents (linestrings,
32/// polygons, etc.) into disjoint partitions without overlaps. Therefore, a spatial partitioner
33/// usually defines a set of spatial partitions (e.g., grid cells), and assigns each spatial object
34/// to one or more partitions based on its spatial extent. The spatial partitioner for our
35/// out-of-core spatial join follows a similar, but a bit different approach:
36///
37/// 1. It defines a fixed number of regular spatial partitions (e.g., grid cells), just like
38/// traditional spatial partitioners.
39/// 2. It defines a `None` partition for spatial objects that does not intersect with any of the
40/// partitioning grids.
41/// 3. It defines a `Multi` partition for spatial objects that intersect with multiple regular partitions.
42///
43/// The partitioning result can be one of the following:
44/// - Assigned to one of the regular partitions (if it intersects with exactly one regular partition).
45/// - Assigned to the `None` partition (if it does not intersect with any regular partition).
46/// - Assigned to the `Multi` partition (if it intersects with multiple regular partitions).
47///
48/// This spatial partitioning scheme assigns one and only one partition to each spatial object,
49/// which simplifies the partitioning logic for out-of-core spatial join. The partitioner will be designed
50/// to produce only `Regular` partitions for indexed objects, and may produce `None` and `Multi` partitions
51/// for probe objects, depending on their spatial extents.
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
53pub enum SpatialPartition {
54 Regular(u32),
55 None,
56 Multi,
57}
58
59/// Partitioning larger-than-memory indexed side to support out-of-core spatial join.
60pub trait SpatialPartitioner: Send {
61 /// Get the total number of spatial partitions, excluding the None partition and Multi partition.
62 fn num_regular_partitions(&self) -> usize;
63
64 /// Given a bounding box, return the partition it is assigned to.
65 fn partition(&self, bbox: &BoundingBox) -> Result<SpatialPartition>;
66
67 /// Given a bounding box, return the partition it is assigned to. This function never returns
68 /// Multi partition. If `bbox` intersects with multiple partitions, only one of them will be
69 /// selected as regular partition.
70 fn partition_no_multi(&self, bbox: &BoundingBox) -> Result<SpatialPartition>;
71
72 /// Clone the partitioner as a boxed trait object.
73 fn box_clone(&self) -> Box<dyn SpatialPartitioner>;
74}
75
76/// Indicates for which side of the spatial join the partitioning is being performed.
77/// Different methods should be used for the build side and probe side.
78#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
79pub enum PartitionedSide {
80 /// Invoke [`SpatialPartitioner::partition_no_multi`] for partitioning
81 BuildSide,
82 /// Invoke [`SpatialPartitioner::partition`] for partitioning
83 ProbeSide,
84}