reddb_server/storage/query/planner/pathkeys.rs
1//! Pathkey tracking — Fase 4 P3 companion module.
2//!
3//! A `PathKey` describes an output-order guarantee that a plan
4//! operator makes to its consumer: "the rows coming out of this
5//! operator are sorted by column X ascending". The planner
6//! consumes pathkeys to pick optimisations like **incremental
7//! sort** (`sort.rs::incremental_sort_top_k`) and **merge
8//! join**: if a scan already returns rows in the order a sort
9//! needs, the sort collapses to a truncate or a group-by-prefix
10//! walk.
11//!
12//! Mirrors PG's `src/backend/optimizer/path/pathkeys.c` with
13//! three simplifications:
14//!
15//! - **No equivalence classes**: PG tracks equivalent columns
16//! via `EquivalenceClass` so `a.id = b.id` lets a sort on
17//! `a.id` satisfy a requirement for `b.id`. We track pathkeys
18//! per-column with no cross-relation equivalence today.
19//! - **No volatility class**: PG splits pathkeys by volatile
20//! function boundaries. reddb scalar functions are Volatile
21//! or Scalar (see function_catalog::FunctionKind) and the
22//! planner walks each operator node individually, so we
23//! don't need a separate class marker.
24//! - **No shared costs**: PG dedupes pathkeys across plans so
25//! the DP can compare them by identity. Ours are cheap
26//! Vec<PathKey> copies — optimize when the planner spends
27//! measurable time hashing them.
28//!
29//! This module is **not yet wired** into the planner. Wiring
30//! plugs into `planner/logical.rs` when a plan operator
31//! declares the order of its output — scans declare index
32//! order, merge-joins combine left+right pathkeys, sorts emit
33//! exactly their sort keys, and so on.
34
35use crate::storage::query::ast::FieldRef;
36
37/// Direction component of a pathkey — matches `OrderBy::Direction`
38/// but intentionally decoupled so the planner can reason about
39/// pathkeys without touching AST types.
40#[derive(Debug, Clone, Copy, PartialEq, Eq)]
41pub enum PathKeyDirection {
42 Ascending,
43 Descending,
44}
45
46/// Null placement within a pathkey — ascending pathkeys
47/// typically place nulls last; descending typically first.
48/// Stored explicitly because SQL's `NULLS FIRST` / `NULLS LAST`
49/// override the default.
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum PathKeyNulls {
52 First,
53 Last,
54}
55
56/// One column in an ordering guarantee. A `PathKeys` is a
57/// Vec<PathKey> representing a lexicographic ordering — the
58/// first pathkey dominates, the next breaks ties, and so on.
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct PathKey {
61 pub field: FieldRef,
62 pub direction: PathKeyDirection,
63 pub nulls: PathKeyNulls,
64}
65
66impl PathKey {
67 /// Create an ascending pathkey with NULLS LAST (the PG
68 /// default for ASC). Convenience constructor used by
69 /// `IndexScan` when the index is ascending.
70 pub fn asc(field: FieldRef) -> Self {
71 Self {
72 field,
73 direction: PathKeyDirection::Ascending,
74 nulls: PathKeyNulls::Last,
75 }
76 }
77
78 /// Create a descending pathkey with NULLS FIRST.
79 pub fn desc(field: FieldRef) -> Self {
80 Self {
81 field,
82 direction: PathKeyDirection::Descending,
83 nulls: PathKeyNulls::First,
84 }
85 }
86
87 /// Returns true when two pathkeys describe the same column
88 /// in the same direction. Null placement is NOT compared —
89 /// two pathkeys on the same column are equivalent for
90 /// incremental-sort purposes regardless of null placement
91 /// because sort stability handles the null edge case.
92 pub fn same_column_and_direction(&self, other: &PathKey) -> bool {
93 self.field == other.field && self.direction == other.direction
94 }
95}
96
97/// A list of pathkeys describing a plan operator's output
98/// order. The order is lexicographic: `keys[0]` is the primary
99/// sort key, `keys[1]` breaks ties, etc.
100#[derive(Debug, Clone, Default, PartialEq, Eq)]
101pub struct PathKeys {
102 pub keys: Vec<PathKey>,
103}
104
105impl PathKeys {
106 /// Empty pathkeys — the plan operator makes no order
107 /// guarantee.
108 pub fn unordered() -> Self {
109 Self { keys: Vec::new() }
110 }
111
112 /// Single-key pathkeys ordered by `field` ascending.
113 pub fn asc(field: FieldRef) -> Self {
114 Self {
115 keys: vec![PathKey::asc(field)],
116 }
117 }
118
119 /// Single-key pathkeys ordered by `field` descending.
120 pub fn desc(field: FieldRef) -> Self {
121 Self {
122 keys: vec![PathKey::desc(field)],
123 }
124 }
125
126 /// True when the operator emits rows with no guaranteed
127 /// order. Consumers that need a specific order must add a
128 /// Sort node above.
129 pub fn is_unordered(&self) -> bool {
130 self.keys.is_empty()
131 }
132
133 /// Returns the number of leading pathkeys that match
134 /// `required`. Used by the planner to decide whether to
135 /// emit an incremental sort: if `prefix_match` returns
136 /// ≥ 1, the input already satisfies a prefix of the
137 /// required order and an incremental sort is cheaper
138 /// than a full sort.
139 ///
140 /// Example:
141 /// - self: `[a ASC, b ASC]`
142 /// - required: `[a ASC, b ASC, c DESC]`
143 /// - returns: `2`
144 ///
145 /// - self: `[a ASC]`
146 /// - required: `[b ASC, a ASC]`
147 /// - returns: `0` (prefix doesn't match)
148 pub fn prefix_match(&self, required: &PathKeys) -> usize {
149 let mut matched = 0;
150 for (mine, req) in self.keys.iter().zip(required.keys.iter()) {
151 if mine.same_column_and_direction(req) {
152 matched += 1;
153 } else {
154 break;
155 }
156 }
157 matched
158 }
159
160 /// True when `self` fully satisfies `required` — i.e.
161 /// `self.prefix_match(required) == required.keys.len()`
162 /// AND `self` has at least as many keys as `required`.
163 /// Callers use this to decide whether a Sort node can be
164 /// elided entirely.
165 pub fn satisfies(&self, required: &PathKeys) -> bool {
166 if required.keys.len() > self.keys.len() {
167 return false;
168 }
169 self.prefix_match(required) == required.keys.len()
170 }
171
172 /// Append a pathkey, returning a new `PathKeys` with one
173 /// more column at the end. Used by the planner when
174 /// composing pathkeys from an index scan + a tiebreaker.
175 pub fn appended(&self, key: PathKey) -> Self {
176 let mut keys = self.keys.clone();
177 keys.push(key);
178 Self { keys }
179 }
180
181 /// Truncate to the first `n` keys. Used when a plan
182 /// operator only preserves a prefix of its input's order
183 /// (e.g. hash aggregation destroys ordering beyond the
184 /// grouping columns).
185 pub fn truncated(&self, n: usize) -> Self {
186 Self {
187 keys: self.keys.iter().take(n).cloned().collect(),
188 }
189 }
190
191 /// Number of pathkeys in the list.
192 pub fn len(&self) -> usize {
193 self.keys.len()
194 }
195
196 /// Is the pathkey list empty?
197 pub fn is_empty(&self) -> bool {
198 self.keys.is_empty()
199 }
200}
201
202/// Strategy hint returned by `plan_sort` — tells the runtime
203/// whether to use a full sort, an incremental sort, or skip
204/// the sort operator entirely because the input is already
205/// sorted.
206#[derive(Debug, Clone, Copy, PartialEq, Eq)]
207pub enum SortStrategy {
208 /// Input is already in the required order — no sort
209 /// needed. The planner can elide the Sort node.
210 None,
211 /// Input matches a prefix of the required order. Use
212 /// `incremental_sort_top_k` with the matching prefix
213 /// length.
214 Incremental { prefix_len: usize },
215 /// Input has no relevant order. Full sort required.
216 Full,
217}
218
219/// Decide how to sort an input with given `input_order` to
220/// satisfy `required_order`. Returns a strategy hint the
221/// planner can convert into a plan node.
222pub fn plan_sort(input_order: &PathKeys, required_order: &PathKeys) -> SortStrategy {
223 if required_order.is_unordered() {
224 return SortStrategy::None;
225 }
226 if input_order.satisfies(required_order) {
227 return SortStrategy::None;
228 }
229 let prefix = input_order.prefix_match(required_order);
230 if prefix == 0 {
231 return SortStrategy::Full;
232 }
233 SortStrategy::Incremental { prefix_len: prefix }
234}