Skip to main content

reddb_rql/planner/
pathkeys.rs

1//! Pathkey tracking — Fase 4 P3 companion module.
2//!
3//! A `PathKey` describes an output-order guarantee that a plan
4//! operator makes to its consumer: "the rows coming out of this
5//! operator are sorted by column X ascending". The planner
6//! consumes pathkeys to pick optimisations like **incremental
7//! sort** (`sort.rs::incremental_sort_top_k`) and **merge
8//! join**: if a scan already returns rows in the order a sort
9//! needs, the sort collapses to a truncate or a group-by-prefix
10//! walk.
11//!
12//! Mirrors PG's `src/backend/optimizer/path/pathkeys.c` with
13//! three simplifications:
14//!
15//! - **No equivalence classes**: PG tracks equivalent columns
16//!   via `EquivalenceClass` so `a.id = b.id` lets a sort on
17//!   `a.id` satisfy a requirement for `b.id`. We track pathkeys
18//!   per-column with no cross-relation equivalence today.
19//! - **No volatility class**: PG splits pathkeys by volatile
20//!   function boundaries. reddb scalar functions are Volatile
21//!   or Scalar (see function_catalog::FunctionKind) and the
22//!   planner walks each operator node individually, so we
23//!   don't need a separate class marker.
24//! - **No shared costs**: PG dedupes pathkeys across plans so
25//!   the DP can compare them by identity. Ours are cheap
26//!   Vec<PathKey> copies — optimize when the planner spends
27//!   measurable time hashing them.
28//!
29//! This module is **not yet wired** into the planner. Wiring
30//! plugs into `planner/logical.rs` when a plan operator
31//! declares the order of its output — scans declare index
32//! order, merge-joins combine left+right pathkeys, sorts emit
33//! exactly their sort keys, and so on.
34
35use crate::ast::FieldRef;
36
37/// Direction component of a pathkey — matches `OrderBy::Direction`
38/// but intentionally decoupled so the planner can reason about
39/// pathkeys without touching AST types.
40#[derive(Debug, Clone, Copy, PartialEq, Eq)]
41pub enum PathKeyDirection {
42    Ascending,
43    Descending,
44}
45
46/// Null placement within a pathkey — ascending pathkeys
47/// typically place nulls last; descending typically first.
48/// Stored explicitly because SQL's `NULLS FIRST` / `NULLS LAST`
49/// override the default.
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum PathKeyNulls {
52    First,
53    Last,
54}
55
56/// One column in an ordering guarantee. A `PathKeys` is a
57/// Vec<PathKey> representing a lexicographic ordering — the
58/// first pathkey dominates, the next breaks ties, and so on.
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct PathKey {
61    pub field: FieldRef,
62    pub direction: PathKeyDirection,
63    pub nulls: PathKeyNulls,
64}
65
66impl PathKey {
67    /// Create an ascending pathkey with NULLS LAST (the PG
68    /// default for ASC). Convenience constructor used by
69    /// `IndexScan` when the index is ascending.
70    pub fn asc(field: FieldRef) -> Self {
71        Self {
72            field,
73            direction: PathKeyDirection::Ascending,
74            nulls: PathKeyNulls::Last,
75        }
76    }
77
78    /// Create a descending pathkey with NULLS FIRST.
79    pub fn desc(field: FieldRef) -> Self {
80        Self {
81            field,
82            direction: PathKeyDirection::Descending,
83            nulls: PathKeyNulls::First,
84        }
85    }
86
87    /// Returns true when two pathkeys describe the same column
88    /// in the same direction. Null placement is NOT compared —
89    /// two pathkeys on the same column are equivalent for
90    /// incremental-sort purposes regardless of null placement
91    /// because sort stability handles the null edge case.
92    pub fn same_column_and_direction(&self, other: &PathKey) -> bool {
93        self.field == other.field && self.direction == other.direction
94    }
95}
96
97/// A list of pathkeys describing a plan operator's output
98/// order. The order is lexicographic: `keys[0]` is the primary
99/// sort key, `keys[1]` breaks ties, etc.
100#[derive(Debug, Clone, Default, PartialEq, Eq)]
101pub struct PathKeys {
102    pub keys: Vec<PathKey>,
103}
104
105impl PathKeys {
106    /// Empty pathkeys — the plan operator makes no order
107    /// guarantee.
108    pub fn unordered() -> Self {
109        Self { keys: Vec::new() }
110    }
111
112    /// Single-key pathkeys ordered by `field` ascending.
113    pub fn asc(field: FieldRef) -> Self {
114        Self {
115            keys: vec![PathKey::asc(field)],
116        }
117    }
118
119    /// Single-key pathkeys ordered by `field` descending.
120    pub fn desc(field: FieldRef) -> Self {
121        Self {
122            keys: vec![PathKey::desc(field)],
123        }
124    }
125
126    /// True when the operator emits rows with no guaranteed
127    /// order. Consumers that need a specific order must add a
128    /// Sort node above.
129    pub fn is_unordered(&self) -> bool {
130        self.keys.is_empty()
131    }
132
133    /// Returns the number of leading pathkeys that match
134    /// `required`. Used by the planner to decide whether to
135    /// emit an incremental sort: if `prefix_match` returns
136    /// ≥ 1, the input already satisfies a prefix of the
137    /// required order and an incremental sort is cheaper
138    /// than a full sort.
139    ///
140    /// Example:
141    /// - self:     `[a ASC, b ASC]`
142    /// - required: `[a ASC, b ASC, c DESC]`
143    /// - returns:  `2`
144    ///
145    /// - self:     `[a ASC]`
146    /// - required: `[b ASC, a ASC]`
147    /// - returns:  `0` (prefix doesn't match)
148    pub fn prefix_match(&self, required: &PathKeys) -> usize {
149        let mut matched = 0;
150        for (mine, req) in self.keys.iter().zip(required.keys.iter()) {
151            if mine.same_column_and_direction(req) {
152                matched += 1;
153            } else {
154                break;
155            }
156        }
157        matched
158    }
159
160    /// True when `self` fully satisfies `required` — i.e.
161    /// `self.prefix_match(required) == required.keys.len()`
162    /// AND `self` has at least as many keys as `required`.
163    /// Callers use this to decide whether a Sort node can be
164    /// elided entirely.
165    pub fn satisfies(&self, required: &PathKeys) -> bool {
166        if required.keys.len() > self.keys.len() {
167            return false;
168        }
169        self.prefix_match(required) == required.keys.len()
170    }
171
172    /// Append a pathkey, returning a new `PathKeys` with one
173    /// more column at the end. Used by the planner when
174    /// composing pathkeys from an index scan + a tiebreaker.
175    pub fn appended(&self, key: PathKey) -> Self {
176        let mut keys = self.keys.clone();
177        keys.push(key);
178        Self { keys }
179    }
180
181    /// Truncate to the first `n` keys. Used when a plan
182    /// operator only preserves a prefix of its input's order
183    /// (e.g. hash aggregation destroys ordering beyond the
184    /// grouping columns).
185    pub fn truncated(&self, n: usize) -> Self {
186        Self {
187            keys: self.keys.iter().take(n).cloned().collect(),
188        }
189    }
190
191    /// Number of pathkeys in the list.
192    pub fn len(&self) -> usize {
193        self.keys.len()
194    }
195
196    /// Is the pathkey list empty?
197    pub fn is_empty(&self) -> bool {
198        self.keys.is_empty()
199    }
200}
201
202/// Strategy hint returned by `plan_sort` — tells the runtime
203/// whether to use a full sort, an incremental sort, or skip
204/// the sort operator entirely because the input is already
205/// sorted.
206#[derive(Debug, Clone, Copy, PartialEq, Eq)]
207pub enum SortStrategy {
208    /// Input is already in the required order — no sort
209    /// needed. The planner can elide the Sort node.
210    None,
211    /// Input matches a prefix of the required order. Use
212    /// `incremental_sort_top_k` with the matching prefix
213    /// length.
214    Incremental { prefix_len: usize },
215    /// Input has no relevant order. Full sort required.
216    Full,
217}
218
219/// Decide how to sort an input with given `input_order` to
220/// satisfy `required_order`. Returns a strategy hint the
221/// planner can convert into a plan node.
222pub fn plan_sort(input_order: &PathKeys, required_order: &PathKeys) -> SortStrategy {
223    if required_order.is_unordered() {
224        return SortStrategy::None;
225    }
226    if input_order.satisfies(required_order) {
227        return SortStrategy::None;
228    }
229    let prefix = input_order.prefix_match(required_order);
230    if prefix == 0 {
231        return SortStrategy::Full;
232    }
233    SortStrategy::Incremental { prefix_len: prefix }
234}
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239
240    fn col(name: &str) -> FieldRef {
241        FieldRef::column("", name)
242    }
243
244    #[test]
245    fn constructors_apply_sql_default_null_placement() {
246        let asc = PathKey::asc(col("created_at"));
247        assert_eq!(asc.direction, PathKeyDirection::Ascending);
248        assert_eq!(asc.nulls, PathKeyNulls::Last);
249
250        let desc = PathKey::desc(col("score"));
251        assert_eq!(desc.direction, PathKeyDirection::Descending);
252        assert_eq!(desc.nulls, PathKeyNulls::First);
253    }
254
255    #[test]
256    fn prefix_matching_ignores_null_placement_but_not_direction_or_column() {
257        let input = PathKeys {
258            keys: vec![
259                PathKey {
260                    field: col("tenant_id"),
261                    direction: PathKeyDirection::Ascending,
262                    nulls: PathKeyNulls::First,
263                },
264                PathKey::desc(col("created_at")),
265            ],
266        };
267        let required = PathKeys {
268            keys: vec![
269                PathKey::asc(col("tenant_id")),
270                PathKey::desc(col("created_at")),
271                PathKey::asc(col("id")),
272            ],
273        };
274
275        assert_eq!(input.prefix_match(&required), 2);
276
277        let wrong_direction = PathKeys::desc(col("tenant_id"));
278        assert_eq!(input.prefix_match(&wrong_direction), 0);
279
280        let wrong_column = PathKeys::asc(col("account_id"));
281        assert_eq!(input.prefix_match(&wrong_column), 0);
282    }
283
284    #[test]
285    fn satisfies_requires_all_required_keys_in_order() {
286        let input = PathKeys::asc(col("a"))
287            .appended(PathKey::asc(col("b")))
288            .appended(PathKey::desc(col("c")));
289
290        assert!(input.satisfies(&PathKeys::asc(col("a"))));
291        assert!(input.satisfies(&PathKeys::asc(col("a")).appended(PathKey::asc(col("b")))));
292        assert!(!PathKeys::asc(col("a")).satisfies(&input));
293        assert!(!input.satisfies(&PathKeys::asc(col("b"))));
294    }
295
296    #[test]
297    fn appended_truncated_and_empty_report_stable_lengths() {
298        let unordered = PathKeys::unordered();
299        assert!(unordered.is_unordered());
300        assert!(unordered.is_empty());
301        assert_eq!(unordered.len(), 0);
302
303        let keys = unordered
304            .appended(PathKey::asc(col("a")))
305            .appended(PathKey::desc(col("b")));
306        assert_eq!(keys.len(), 2);
307        assert_eq!(keys.truncated(1), PathKeys::asc(col("a")));
308        assert_eq!(keys.truncated(99), keys);
309    }
310
311    #[test]
312    fn plan_sort_distinguishes_none_incremental_and_full() {
313        let input = PathKeys::asc(col("tenant_id")).appended(PathKey::desc(col("created_at")));
314        let exact = input.clone();
315        let extension = exact.clone().appended(PathKey::asc(col("id")));
316        let unrelated = PathKeys::asc(col("status"));
317
318        assert_eq!(
319            plan_sort(&input, &PathKeys::unordered()),
320            SortStrategy::None
321        );
322        assert_eq!(plan_sort(&input, &exact), SortStrategy::None);
323        assert_eq!(
324            plan_sort(&input, &extension),
325            SortStrategy::Incremental { prefix_len: 2 }
326        );
327        assert_eq!(plan_sort(&input, &unrelated), SortStrategy::Full);
328    }
329}