optd_core/
nodes.rs

1// Copyright (c) 2023-2024 CMU Database Group
2//
3// Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
4// https://opensource.org/licenses/MIT.
5
6//! The RelNode is the basic data structure of the optimizer. It is dynamically typed and is
7//! the internal representation of the plan nodes.
8
9use std::collections::HashMap;
10use std::fmt::{Debug, Display};
11use std::hash::Hash;
12use std::sync::Arc;
13
14use arrow_schema::DataType;
15use chrono::NaiveDate;
16use ordered_float::OrderedFloat;
17use serde::{Deserialize, Deserializer, Serialize, Serializer};
18
19use crate::cascades::GroupId;
20use crate::cost::{Cost, Statistics};
21
22#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
23pub struct SerializableOrderedF64(pub OrderedFloat<f64>);
24
25impl Serialize for SerializableOrderedF64 {
26    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
27    where
28        S: Serializer,
29    {
30        // Directly serialize the inner f64 value of the OrderedFloat
31        self.0 .0.serialize(serializer)
32    }
33}
34
35impl<'de> Deserialize<'de> for SerializableOrderedF64 {
36    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
37    where
38        D: Deserializer<'de>,
39    {
40        // Deserialize an f64 and wrap it in an OrderedFloat
41        let float = f64::deserialize(deserializer)?;
42        Ok(SerializableOrderedF64(OrderedFloat(float)))
43    }
44}
45
46// TODO: why not use arrow types here? Do we really need to define our own Value type?
47// Shouldn't we at least remove this from the core/engine?
48#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
49pub enum Value {
50    UInt8(u8),
51    UInt16(u16),
52    UInt32(u32),
53    UInt64(u64),
54    Int8(i8),
55    Int16(i16),
56    Int32(i32),
57    Int64(i64),
58    Int128(i128),
59    Float(SerializableOrderedF64),
60    String(Arc<str>),
61    Bool(bool),
62    Date32(i32),
63    Decimal128(i128),
64    Serialized(Arc<[u8]>),
65}
66
67impl std::fmt::Display for Value {
68    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69        match self {
70            Self::UInt8(x) => write!(f, "{x}(u8)"),
71            Self::UInt16(x) => write!(f, "{x}(u16)"),
72            Self::UInt32(x) => write!(f, "{x}(u32)"),
73            Self::UInt64(x) => write!(f, "{x}(u64)"),
74            Self::Int8(x) => write!(f, "{x}(i8)"),
75            Self::Int16(x) => write!(f, "{x}(i16)"),
76            Self::Int32(x) => write!(f, "{x}(i32)"),
77            Self::Int64(x) => write!(f, "{x}(i64)"),
78            Self::Int128(x) => write!(f, "{x}(i128)"),
79            Self::Float(x) => write!(f, "{}(float)", x.0),
80            Self::String(x) => write!(f, "\"{x}\""),
81            Self::Bool(x) => write!(f, "{x}"),
82            Self::Date32(x) => write!(f, "{x}(date32)"),
83            Self::Decimal128(x) => write!(f, "{x}(decimal128)"),
84            Self::Serialized(x) => write!(f, "<len:{}>", x.len()),
85        }
86    }
87}
88
89/// The `as_*()` functions do not perform conversions. This is *unlike* the `as`
90/// keyword in rust.
91///
92/// If you want to perform conversions, use the `to_*()` functions.
93impl Value {
94    pub fn as_u8(&self) -> u8 {
95        match self {
96            Value::UInt8(i) => *i,
97            _ => panic!("Value is not an u8"),
98        }
99    }
100
101    pub fn as_u16(&self) -> u16 {
102        match self {
103            Value::UInt16(i) => *i,
104            _ => panic!("Value is not an u16"),
105        }
106    }
107
108    pub fn as_u32(&self) -> u32 {
109        match self {
110            Value::UInt32(i) => *i,
111            _ => panic!("Value is not an u32"),
112        }
113    }
114
115    pub fn as_u64(&self) -> u64 {
116        match self {
117            Value::UInt64(i) => *i,
118            _ => panic!("Value is not an u64"),
119        }
120    }
121
122    pub fn as_i8(&self) -> i8 {
123        match self {
124            Value::Int8(i) => *i,
125            _ => panic!("Value is not an i8"),
126        }
127    }
128
129    pub fn as_i16(&self) -> i16 {
130        match self {
131            Value::Int16(i) => *i,
132            _ => panic!("Value is not an i16"),
133        }
134    }
135
136    pub fn as_i32(&self) -> i32 {
137        match self {
138            Value::Int32(i) => *i,
139            _ => panic!("Value is not an i32"),
140        }
141    }
142
143    pub fn as_i64(&self) -> i64 {
144        match self {
145            Value::Int64(i) => *i,
146            _ => panic!("Value is not an i64"),
147        }
148    }
149
150    pub fn as_i128(&self) -> i128 {
151        match self {
152            Value::Int128(i) => *i,
153            _ => panic!("Value is not an i128"),
154        }
155    }
156
157    pub fn as_f64(&self) -> f64 {
158        match self {
159            Value::Float(i) => *i.0,
160            _ => panic!("Value is not an f64"),
161        }
162    }
163
164    pub fn as_bool(&self) -> bool {
165        match self {
166            Value::Bool(i) => *i,
167            _ => panic!("Value is not a bool"),
168        }
169    }
170
171    pub fn as_str(&self) -> Arc<str> {
172        match self {
173            Value::String(i) => i.clone(),
174            _ => panic!("Value is not a string"),
175        }
176    }
177
178    pub fn as_slice(&self) -> Arc<[u8]> {
179        match self {
180            Value::Serialized(i) => i.clone(),
181            _ => panic!("Value is not a serialized"),
182        }
183    }
184
185    pub fn convert_to_type(&self, typ: DataType) -> Value {
186        match typ {
187            DataType::Int32 => Value::Int32(match self {
188                Value::Int32(i32) => *i32,
189                Value::Int64(i64) => (*i64).try_into().unwrap(),
190                _ => panic!("{self} could not be converted into an Int32"),
191            }),
192            DataType::Int64 => Value::Int64(match self {
193                Value::Int64(i64) => *i64,
194                Value::Int32(i32) => (*i32).into(),
195                _ => panic!("{self} could not be converted into an Int64"),
196            }),
197            DataType::UInt64 => Value::UInt64(match self {
198                Value::Int64(i64) => (*i64).try_into().unwrap(),
199                Value::UInt64(i64) => *i64,
200                Value::UInt32(i32) => (*i32).into(),
201                _ => panic!("{self} could not be converted into an UInt64"),
202            }),
203            DataType::Date32 => Value::Date32(match self {
204                Value::Date32(date32) => *date32,
205                Value::String(str) => {
206                    let date = NaiveDate::parse_from_str(str, "%Y-%m-%d").unwrap();
207                    let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
208                    let duration_since_epoch = date.signed_duration_since(epoch);
209                    let days_since_epoch: i32 = duration_since_epoch.num_days() as i32;
210                    days_since_epoch
211                }
212                _ => panic!("{self} could not be converted into an Date32"),
213            }),
214            _ => unimplemented!("Have not implemented convert_to_type for DataType {typ}"),
215        }
216    }
217}
218
219pub trait NodeType:
220    PartialEq + Eq + Hash + Clone + 'static + Display + Debug + Send + Sync
221{
222    type PredType: PartialEq + Eq + Hash + Clone + 'static + Display + Debug + Send + Sync;
223
224    fn is_logical(&self) -> bool;
225}
226
227/// A pointer to a plan node
228pub type ArcPlanNode<T> = Arc<PlanNode<T>>;
229
230/// A pointer to a predicate node
231pub type ArcPredNode<T> = Arc<PredNode<T>>;
232
233#[derive(Clone, Debug, Hash, PartialEq, Eq)]
234pub enum PlanNodeOrGroup<T: NodeType> {
235    PlanNode(ArcPlanNode<T>),
236    Group(GroupId),
237}
238
239impl<T: NodeType> PlanNodeOrGroup<T> {
240    pub fn is_materialized(&self) -> bool {
241        match self {
242            PlanNodeOrGroup::PlanNode(_) => true,
243            PlanNodeOrGroup::Group(_) => false,
244        }
245    }
246
247    pub fn unwrap_typ(&self) -> T {
248        self.unwrap_plan_node().typ.clone()
249    }
250
251    pub fn unwrap_plan_node(&self) -> ArcPlanNode<T> {
252        match self {
253            PlanNodeOrGroup::PlanNode(node) => node.clone(),
254            PlanNodeOrGroup::Group(_) => panic!("Expected PlanNode, found Group"),
255        }
256    }
257
258    pub fn unwrap_group(&self) -> GroupId {
259        match self {
260            PlanNodeOrGroup::PlanNode(_) => panic!("Expected Group, found PlanNode"),
261            PlanNodeOrGroup::Group(group_id) => *group_id,
262        }
263    }
264}
265
266impl<T: NodeType> std::fmt::Display for PlanNodeOrGroup<T> {
267    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
268        match self {
269            PlanNodeOrGroup::PlanNode(node) => write!(f, "{}", node),
270            PlanNodeOrGroup::Group(group_id) => write!(f, "{}", group_id),
271        }
272    }
273}
274
275#[derive(Clone, Debug, Hash, PartialEq, Eq)]
276pub struct PlanNode<T: NodeType> {
277    /// A generic plan node type
278    pub typ: T,
279    /// Child plan nodes, which may be materialized or placeholder group IDs
280    /// based on how this node was initialized
281    pub children: Vec<PlanNodeOrGroup<T>>,
282    /// Predicate nodes, which are always materialized
283    pub predicates: Vec<ArcPredNode<T>>,
284}
285
286impl<T: NodeType> std::fmt::Display for PlanNode<T> {
287    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
288        write!(f, "({}", self.typ)?;
289        for child in &self.children {
290            write!(f, " {}", child)?;
291        }
292        write!(f, ")")
293    }
294}
295
296impl<T: NodeType> PlanNode<T> {
297    pub fn child(&self, idx: usize) -> PlanNodeOrGroup<T> {
298        self.children[idx].clone()
299    }
300
301    pub fn child_rel(&self, idx: usize) -> ArcPlanNode<T> {
302        self.child(idx).unwrap_plan_node()
303    }
304
305    pub fn predicate(&self, idx: usize) -> ArcPredNode<T> {
306        self.predicates[idx].clone()
307    }
308}
309
310impl<T: NodeType> From<PlanNode<T>> for PlanNodeOrGroup<T> {
311    fn from(value: PlanNode<T>) -> Self {
312        Self::PlanNode(value.into())
313    }
314}
315
316impl<T: NodeType> From<ArcPlanNode<T>> for PlanNodeOrGroup<T> {
317    fn from(value: ArcPlanNode<T>) -> Self {
318        Self::PlanNode(value)
319    }
320}
321
322impl<T: NodeType> From<GroupId> for PlanNodeOrGroup<T> {
323    fn from(value: GroupId) -> Self {
324        Self::Group(value)
325    }
326}
327
328#[derive(Clone, Debug, Hash, PartialEq, Eq)]
329pub struct PredNode<T: NodeType> {
330    /// A generic predicate node type
331    pub typ: T::PredType,
332    /// Child predicate nodes, always materialized
333    pub children: Vec<ArcPredNode<T>>,
334    /// Data associated with the predicate, if any
335    pub data: Option<Value>,
336}
337
338impl<T: NodeType> std::fmt::Display for PredNode<T> {
339    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
340        write!(f, "({}", self.typ)?;
341        for child in &self.children {
342            write!(f, " {}", child)?;
343        }
344        if let Some(data) = &self.data {
345            write!(f, " {}", data)?;
346        }
347        write!(f, ")")
348    }
349}
350impl<T: NodeType> PredNode<T> {
351    pub fn child(&self, idx: usize) -> ArcPredNode<T> {
352        self.children[idx].clone()
353    }
354
355    pub fn unwrap_data(&self) -> Value {
356        self.data.clone().unwrap()
357    }
358}
359
360/// Metadata for a rel node.
361#[derive(Clone)]
362pub struct PlanNodeMeta {
363    /// The group (id) of the `RelNode`
364    pub group_id: GroupId,
365    /// Weighted cost of the `RelNode`
366    pub weighted_cost: f64,
367    /// Cost of the `RelNode`
368    pub cost: Cost,
369    /// Statistics
370    pub stat: Arc<Statistics>,
371    /// Cost in display string
372    /// TODO: this should be lazily processed and generated
373    pub cost_display: String,
374    /// Statistics in display string
375    /// TODO: this should be lazily processed and generated
376    pub stat_display: String,
377}
378
379impl PlanNodeMeta {
380    pub fn new(
381        group_id: GroupId,
382        weighted_cost: f64,
383        cost: Cost,
384        stat: Arc<Statistics>,
385        cost_display: String,
386        stat_display: String,
387    ) -> Self {
388        Self {
389            group_id,
390            weighted_cost,
391            cost,
392            stat,
393            cost_display,
394            stat_display,
395        }
396    }
397}
398
399/// A hash table storing `RelNode` (memory address, metadata) pairs.
400pub type PlanNodeMetaMap = HashMap<usize, PlanNodeMeta>;