Skip to main content

sedona_spatial_join/
partitioning.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use datafusion_common::Result;
19use sedona_geometry::bounding_box::BoundingBox;
20
21pub mod broadcast;
22pub mod flat;
23pub mod kdb;
24pub(crate) mod partition_slots;
25pub mod round_robin;
26pub mod rtree;
27pub mod stream_repartitioner;
28pub(crate) mod util;
29
30/// Spatial partitioning is different from traditional data partitioning such as hash partitioning.
31/// There is no perfect spatial partitioner that can partition spatial objects with extents (linestrings,
32/// polygons, etc.) into disjoint partitions without overlaps. Therefore, a spatial partitioner
33/// usually defines a set of spatial partitions (e.g., grid cells), and assigns each spatial object
34/// to one or more partitions based on its spatial extent. The spatial partitioner for our
35/// out-of-core spatial join follows a similar, but a bit different approach:
36///
37/// 1. It defines a fixed number of regular spatial partitions (e.g., grid cells), just like
38///    traditional spatial partitioners.
39/// 2. It defines a `None` partition for spatial objects that does not intersect with any of the
40///    partitioning grids.
41/// 3. It defines a `Multi` partition for spatial objects that intersect with multiple regular partitions.
42///
43/// The partitioning result can be one of the following:
44/// - Assigned to one of the regular partitions (if it intersects with exactly one regular partition).
45/// - Assigned to the `None` partition (if it does not intersect with any regular partition).
46/// - Assigned to the `Multi` partition (if it intersects with multiple regular partitions).
47///
48/// This spatial partitioning scheme assigns one and only one partition to each spatial object,
49/// which simplifies the partitioning logic for out-of-core spatial join. The partitioner will be designed
50/// to produce only `Regular` partitions for indexed objects, and may produce `None` and `Multi` partitions
51/// for probe objects, depending on their spatial extents.
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
53pub enum SpatialPartition {
54    Regular(u32),
55    None,
56    Multi,
57}
58
59/// Partitioning larger-than-memory indexed side to support out-of-core spatial join.
60pub trait SpatialPartitioner: Send {
61    /// Get the total number of spatial partitions, excluding the None partition and Multi partition.
62    fn num_regular_partitions(&self) -> usize;
63
64    /// Given a bounding box, return the partition it is assigned to.
65    fn partition(&self, bbox: &BoundingBox) -> Result<SpatialPartition>;
66
67    /// Given a bounding box, return the partition it is assigned to. This function never returns
68    /// Multi partition. If `bbox` intersects with multiple partitions, only one of them will be
69    /// selected as regular partition.
70    fn partition_no_multi(&self, bbox: &BoundingBox) -> Result<SpatialPartition>;
71
72    /// Clone the partitioner as a boxed trait object.
73    fn box_clone(&self) -> Box<dyn SpatialPartitioner>;
74}
75
76/// Indicates for which side of the spatial join the partitioning is being performed.
77/// Different methods should be used for the build side and probe side.
78#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
79pub enum PartitionedSide {
80    /// Invoke [`SpatialPartitioner::partition_no_multi`] for partitioning
81    BuildSide,
82    /// Invoke [`SpatialPartitioner::partition`] for partitioning
83    ProbeSide,
84}