datafusion_expr_common/sort_properties.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::ops::Neg;
19
20use crate::interval_arithmetic::Interval;
21
22use arrow::compute::SortOptions;
23use arrow::datatypes::DataType;
24
25/// To propagate [`SortOptions`] across the `PhysicalExpr`, it is insufficient
26/// to simply use `Option<SortOptions>`: There must be a differentiation between
27/// unordered columns and literal values, since literals may not break the ordering
28/// when they are used as a child of some binary expression when the other child has
29/// some ordering. On the other hand, unordered columns cannot maintain ordering when
30/// they take part in such operations.
31///
32/// Example: ((a_ordered + b_unordered) + c_ordered) expression cannot end up with
33/// sorted data; however the ((a_ordered + 999) + c_ordered) expression can. Therefore,
34/// we need two different variants for literals and unordered columns as literals are
35/// often more ordering-friendly under most mathematical operations.
36#[derive(PartialEq, Debug, Clone, Copy, Default)]
37pub enum SortProperties {
38 /// Use the ordinary [`SortOptions`] struct to represent ordered data:
39 Ordered(SortOptions),
40 // This alternative represents unordered data:
41 #[default]
42 Unordered,
43 // Singleton is used for single-valued literal numbers:
44 Singleton,
45}
46
47impl SortProperties {
48 pub fn add(&self, rhs: &Self) -> Self {
49 match (self, rhs) {
50 (Self::Singleton, _) => *rhs,
51 (_, Self::Singleton) => *self,
52 (Self::Ordered(lhs), Self::Ordered(rhs))
53 if lhs.descending == rhs.descending =>
54 {
55 Self::Ordered(SortOptions {
56 descending: lhs.descending,
57 nulls_first: lhs.nulls_first || rhs.nulls_first,
58 })
59 }
60 _ => Self::Unordered,
61 }
62 }
63
64 pub fn sub(&self, rhs: &Self) -> Self {
65 match (self, rhs) {
66 (Self::Singleton, Self::Singleton) => Self::Singleton,
67 (Self::Singleton, Self::Ordered(rhs)) => Self::Ordered(SortOptions {
68 descending: !rhs.descending,
69 nulls_first: rhs.nulls_first,
70 }),
71 (_, Self::Singleton) => *self,
72 (Self::Ordered(lhs), Self::Ordered(rhs))
73 if lhs.descending != rhs.descending =>
74 {
75 Self::Ordered(SortOptions {
76 descending: lhs.descending,
77 nulls_first: lhs.nulls_first || rhs.nulls_first,
78 })
79 }
80 _ => Self::Unordered,
81 }
82 }
83
84 pub fn gt_or_gteq(&self, rhs: &Self) -> Self {
85 match (self, rhs) {
86 (Self::Singleton, Self::Ordered(rhs)) => Self::Ordered(SortOptions {
87 descending: !rhs.descending,
88 nulls_first: rhs.nulls_first,
89 }),
90 (_, Self::Singleton) => *self,
91 (Self::Ordered(lhs), Self::Ordered(rhs))
92 if lhs.descending != rhs.descending =>
93 {
94 *self
95 }
96 _ => Self::Unordered,
97 }
98 }
99
100 pub fn and_or(&self, rhs: &Self) -> Self {
101 match (self, rhs) {
102 (Self::Ordered(lhs), Self::Ordered(rhs))
103 if lhs.descending == rhs.descending =>
104 {
105 Self::Ordered(SortOptions {
106 descending: lhs.descending,
107 nulls_first: lhs.nulls_first || rhs.nulls_first,
108 })
109 }
110 (Self::Ordered(opt), Self::Singleton)
111 | (Self::Singleton, Self::Ordered(opt)) => Self::Ordered(SortOptions {
112 descending: opt.descending,
113 nulls_first: opt.nulls_first,
114 }),
115 (Self::Singleton, Self::Singleton) => Self::Singleton,
116 _ => Self::Unordered,
117 }
118 }
119}
120
121impl Neg for SortProperties {
122 type Output = Self;
123
124 fn neg(mut self) -> Self::Output {
125 if let SortProperties::Ordered(SortOptions { descending, .. }) = &mut self {
126 *descending = !*descending;
127 }
128 self
129 }
130}
131
132/// Represents the properties of a `PhysicalExpr`, including its sorting,
133/// range, and whether it preserves lexicographical ordering.
134#[derive(Debug, Clone)]
135pub struct ExprProperties {
136 /// Properties that describe the sorting behavior of the expression,
137 /// such as whether it is ordered, unordered, or a singleton value.
138 pub sort_properties: SortProperties,
139 /// A closed interval representing the range of possible values for
140 /// the expression. Used to compute reliable bounds.
141 pub range: Interval,
142 /// Indicates whether the expression preserves lexicographical ordering
143 /// of its inputs. For example, string concatenation preserves ordering,
144 /// while addition does not.
145 pub preserves_lex_ordering: bool,
146}
147
148impl ExprProperties {
149 /// Creates a new `ExprProperties` instance with unknown sort properties,
150 /// unknown range, and unknown lexicographical ordering preservation.
151 pub fn new_unknown() -> Self {
152 Self {
153 sort_properties: SortProperties::default(),
154 range: Interval::make_unbounded(&DataType::Null).unwrap(),
155 preserves_lex_ordering: false,
156 }
157 }
158
159 /// Sets the sorting properties of the expression and returns the modified instance.
160 pub fn with_order(mut self, order: SortProperties) -> Self {
161 self.sort_properties = order;
162 self
163 }
164
165 /// Sets the range of the expression and returns the modified instance.
166 pub fn with_range(mut self, range: Interval) -> Self {
167 self.range = range;
168 self
169 }
170
171 /// Sets whether the expression maintains lexicographical ordering and returns the modified instance.
172 pub fn with_preserves_lex_ordering(mut self, preserves_lex_ordering: bool) -> Self {
173 self.preserves_lex_ordering = preserves_lex_ordering;
174 self
175 }
176}