datafusion_expr_common/
sort_properties.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::ops::Neg;
19
20use crate::interval_arithmetic::Interval;
21
22use arrow::compute::SortOptions;
23use arrow::datatypes::DataType;
24
25/// To propagate [`SortOptions`] across the `PhysicalExpr`, it is insufficient
26/// to simply use `Option<SortOptions>`: There must be a differentiation between
27/// unordered columns and literal values, since literals may not break the ordering
28/// when they are used as a child of some binary expression when the other child has
29/// some ordering. On the other hand, unordered columns cannot maintain ordering when
30/// they take part in such operations.
31///
32/// Example: ((a_ordered + b_unordered) + c_ordered) expression cannot end up with
33/// sorted data; however the ((a_ordered + 999) + c_ordered) expression can. Therefore,
34/// we need two different variants for literals and unordered columns as literals are
35/// often more ordering-friendly under most mathematical operations.
36#[derive(PartialEq, Debug, Clone, Copy, Default)]
37pub enum SortProperties {
38    /// Use the ordinary [`SortOptions`] struct to represent ordered data:
39    Ordered(SortOptions),
40    // This alternative represents unordered data:
41    #[default]
42    Unordered,
43    // Singleton is used for single-valued literal numbers:
44    Singleton,
45}
46
47impl SortProperties {
48    pub fn add(&self, rhs: &Self) -> Self {
49        match (self, rhs) {
50            (Self::Singleton, _) => *rhs,
51            (_, Self::Singleton) => *self,
52            (Self::Ordered(lhs), Self::Ordered(rhs))
53                if lhs.descending == rhs.descending =>
54            {
55                Self::Ordered(SortOptions {
56                    descending: lhs.descending,
57                    nulls_first: lhs.nulls_first || rhs.nulls_first,
58                })
59            }
60            _ => Self::Unordered,
61        }
62    }
63
64    pub fn sub(&self, rhs: &Self) -> Self {
65        match (self, rhs) {
66            (Self::Singleton, Self::Singleton) => Self::Singleton,
67            (Self::Singleton, Self::Ordered(rhs)) => Self::Ordered(SortOptions {
68                descending: !rhs.descending,
69                nulls_first: rhs.nulls_first,
70            }),
71            (_, Self::Singleton) => *self,
72            (Self::Ordered(lhs), Self::Ordered(rhs))
73                if lhs.descending != rhs.descending =>
74            {
75                Self::Ordered(SortOptions {
76                    descending: lhs.descending,
77                    nulls_first: lhs.nulls_first || rhs.nulls_first,
78                })
79            }
80            _ => Self::Unordered,
81        }
82    }
83
84    pub fn gt_or_gteq(&self, rhs: &Self) -> Self {
85        match (self, rhs) {
86            (Self::Singleton, Self::Ordered(rhs)) => Self::Ordered(SortOptions {
87                descending: !rhs.descending,
88                nulls_first: rhs.nulls_first,
89            }),
90            (_, Self::Singleton) => *self,
91            (Self::Ordered(lhs), Self::Ordered(rhs))
92                if lhs.descending != rhs.descending =>
93            {
94                *self
95            }
96            _ => Self::Unordered,
97        }
98    }
99
100    pub fn and_or(&self, rhs: &Self) -> Self {
101        match (self, rhs) {
102            (Self::Ordered(lhs), Self::Ordered(rhs))
103                if lhs.descending == rhs.descending =>
104            {
105                Self::Ordered(SortOptions {
106                    descending: lhs.descending,
107                    nulls_first: lhs.nulls_first || rhs.nulls_first,
108                })
109            }
110            (Self::Ordered(opt), Self::Singleton)
111            | (Self::Singleton, Self::Ordered(opt)) => Self::Ordered(SortOptions {
112                descending: opt.descending,
113                nulls_first: opt.nulls_first,
114            }),
115            (Self::Singleton, Self::Singleton) => Self::Singleton,
116            _ => Self::Unordered,
117        }
118    }
119}
120
121impl Neg for SortProperties {
122    type Output = Self;
123
124    fn neg(mut self) -> Self::Output {
125        if let SortProperties::Ordered(SortOptions { descending, .. }) = &mut self {
126            *descending = !*descending;
127        }
128        self
129    }
130}
131
132/// Represents the properties of a `PhysicalExpr`, including its sorting,
133/// range, and whether it preserves lexicographical ordering.
134#[derive(Debug, Clone)]
135pub struct ExprProperties {
136    /// Properties that describe the sorting behavior of the expression,
137    /// such as whether it is ordered, unordered, or a singleton value.
138    pub sort_properties: SortProperties,
139    /// A closed interval representing the range of possible values for
140    /// the expression. Used to compute reliable bounds.
141    pub range: Interval,
142    /// Indicates whether the expression preserves lexicographical ordering
143    /// of its inputs. For example, string concatenation preserves ordering,
144    /// while addition does not.
145    pub preserves_lex_ordering: bool,
146}
147
148impl ExprProperties {
149    /// Creates a new `ExprProperties` instance with unknown sort properties,
150    /// unknown range, and unknown lexicographical ordering preservation.
151    pub fn new_unknown() -> Self {
152        Self {
153            sort_properties: SortProperties::default(),
154            range: Interval::make_unbounded(&DataType::Null).unwrap(),
155            preserves_lex_ordering: false,
156        }
157    }
158
159    /// Sets the sorting properties of the expression and returns the modified instance.
160    pub fn with_order(mut self, order: SortProperties) -> Self {
161        self.sort_properties = order;
162        self
163    }
164
165    /// Sets the range of the expression and returns the modified instance.
166    pub fn with_range(mut self, range: Interval) -> Self {
167        self.range = range;
168        self
169    }
170
171    /// Sets whether the expression maintains lexicographical ordering and returns the modified instance.
172    pub fn with_preserves_lex_ordering(mut self, preserves_lex_ordering: bool) -> Self {
173        self.preserves_lex_ordering = preserves_lex_ordering;
174        self
175    }
176}