otter_sql/
ic.rs

1//! Intermediate representation (IR) and instruction set for an SQL database.
2
3use fmt_derive::{Debug, Display};
4
5use sqlparser::ast::{ColumnOptionDef, DataType};
6
7use crate::{
8    expr::Expr,
9    identifier::{SchemaRef, TableRef},
10    value::Value,
11    vm::RegisterIndex,
12    BoundedString,
13};
14
15#[derive(Debug, Clone)]
16/// The intermediate representation of a query. Made of up [`Instruction`]s.
17pub struct IntermediateCode {
18    pub instrs: Vec<Instruction>,
19}
20
21/// The instruction set of OtterSQL.
22#[derive(Display, Debug, Clone, PartialEq)]
23pub enum Instruction {
24    /// Load a [`Value`] into a register.
25    Value { index: RegisterIndex, value: Value },
26
27    /// Load a [`Expr`] into a register.
28    Expr { index: RegisterIndex, expr: Expr },
29
30    /// Load an *existing* table given by `name`.
31    ///
32    /// This will result in a [`Register::TableRef`](`crate::vm::Register::TableRef`) being stored at the
33    /// given register.
34    Source {
35        index: RegisterIndex,
36        name: TableRef,
37    },
38
39    /// Create a new empty [`Register::TableRef`](`crate::vm::Register::TableRef`).
40    Empty { index: RegisterIndex },
41
42    /// Create a new [`Register::TableRef](`crate::vm::Register::TableRef) pointing to a
43    /// non-existent table.
44    NonExistent { index: RegisterIndex },
45
46    /// Filter the [`Register::TableRef`](`crate::vm::Register::TableRef`) at `index` using the given expression.
47    ///
48    /// This represents a `WHERE` clause of a `SELECT` statement in SQL.
49    Filter { index: RegisterIndex, expr: Expr },
50
51    /// Create a projection of the columns of the [`Register::TableRef`](`crate::vm::Register::TableRef`) at `input`.
52    ///
53    /// The resultant column is added to the [`Register::TableRef`](`crate::vm::Register::TableRef`)
54    /// at `output`. It must be either an empty table or a table with the same number of rows.
55    ///
56    /// This represents the column list of the `SELECT` statement in SQL.
57    Project {
58        input: RegisterIndex,
59        output: RegisterIndex,
60        expr: Expr,
61        #[display(
62            "{}",
63            match alias {
64                None => "None".to_owned(),
65                Some(alias) => format!("{}", alias)
66            }
67        )]
68        alias: Option<BoundedString>,
69    },
70
71    /// Group the [`Register::TableRef`](`crate::vm::Register::TableRef`) at `index` by the given expression.
72    ///
73    /// This will result in a [`Register::GroupedTable`](`crate::vm::Register::GroupedTable`) being stored at the `index` register.
74    ///
75    /// Must be added before any projections so as to catch errors in column selections.
76    GroupBy { index: RegisterIndex, expr: Expr },
77
78    /// Order the [`Register::TableRef`](`crate::vm::Register::TableRef`) at `index` by the given expression.
79    ///
80    /// This represents the `ORDER BY` clause in SQL.
81    Order {
82        index: RegisterIndex,
83        expr: Expr,
84        ascending: bool,
85    },
86
87    /// Truncate the [`Register::TableRef`](`crate::vm::Register::TableRef`) at `index` to the given number of rows.
88    ///
89    /// This represents the `LIMIT` clause in SQL.
90    Limit { index: RegisterIndex, limit: u64 },
91
92    /// Return from register at `index`.
93    ///
94    /// Some values stored in a register may be intermediate values and cannot be returned.
95    /// See [`Register`](`crate::vm::Register`) for more information.
96    Return { index: RegisterIndex },
97
98    /// Create a new schema.
99    ///
100    /// This represents a `CREATE SCHEMA [IF NOT EXISTS]` statement.
101    NewSchema {
102        schema_name: SchemaRef,
103        /// If `true`, the schema is not created if it exists and no error is returned.
104        exists_ok: bool,
105    },
106
107    /// Start defining a  new column and store the temporary metadata in register `index`.
108    ///
109    /// The value stored in the register will be of type [`Register::Column`](`crate::vm::Register::Column`).
110    ColumnDef {
111        index: RegisterIndex,
112        /// The column name.
113        name: BoundedString,
114        data_type: DataType,
115    },
116
117    /// Add an option or constraint to the [`Column`](`crate::vm::Register::Column`) definition in register `index`.
118    AddColumnOption {
119        index: RegisterIndex,
120        option: ColumnOptionDef,
121    },
122
123    /// Add column in register `col_index` to the [`Register::TableRef`](`crate::vm::Register::TableRef`) in `table_reg_index`.
124    AddColumn {
125        table_reg_index: RegisterIndex,
126        col_index: RegisterIndex,
127    },
128
129    /// Create table from the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `index`.
130    ///
131    /// Creation implies that the table is added to the schema.
132    ///
133    /// This represents a `CREATE TABLE [IF NOT EXISTS]` statement.
134    NewTable {
135        index: RegisterIndex,
136        name: TableRef,
137        /// If `true`, the table is not created if it exists and no error is returned.
138        exists_ok: bool,
139    },
140
141    /// Drop the table referenced by the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `index`.
142    DropTable { index: RegisterIndex },
143
144    /// Remove the given column from the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `index`.
145    RemoveColumn {
146        index: RegisterIndex,
147        col_name: BoundedString,
148    },
149
150    /// Rename an existing column from the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `index`.
151    RenameColumn {
152        index: RegisterIndex,
153        old_name: BoundedString,
154        new_name: BoundedString,
155    },
156
157    /// Start a new insertion into the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `view_index`.
158    ///
159    /// A [`Register::InsertDef`](`crate::vm::Register::InsertDef`) is stored in register `index`.
160    InsertDef {
161        table_reg_index: RegisterIndex,
162        index: RegisterIndex,
163    },
164
165    /// Add a column to the [`Register::InsertDef`](`crate::vm::Register::InsertDef`) in register `index`.
166    ColumnInsertDef {
167        insert_index: RegisterIndex,
168        col_name: BoundedString,
169    },
170
171    /// Start defining a new row of data to be inserted into the [`Register::InsertDef`](`crate::vm::Register::InsertDef`) in register `insert_index`.
172    ///
173    /// The value stored in the register `index` will be of type [`Register::InsertRow`](`crate::vm::Register::InsertRow`).
174    RowDef {
175        insert_index: RegisterIndex,
176        row_index: RegisterIndex,
177    },
178
179    /// Add a value to the [`Register::InsertRow`](`crate::vm::Register::InsertRow`) in register `index`.
180    AddValue {
181        row_index: RegisterIndex,
182        expr: Expr,
183    },
184
185    /// Perform insertion defined in the [`Register::InsertRow`](`crate::vm::Register::InsertRow`) in register `index`.
186    ///
187    /// This represents an `INSERT INTO` statement.
188    Insert { index: RegisterIndex },
189
190    /// Update values of the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `index`.
191    ///
192    /// This represents an `UPDATE` statement.
193    Update {
194        index: RegisterIndex,
195        col: Expr,
196        expr: Expr,
197    },
198
199    /// Perform a union of the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `input1` and the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `input2`.
200    ///
201    /// The output is stored as a [`Register::TableRef`](`crate::vm::Register::TableRef`) in register
202    /// `output`.
203    Union {
204        input1: RegisterIndex,
205        input2: RegisterIndex,
206        output: RegisterIndex,
207    },
208
209    /// Perform a cartesian join of the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `input1` and the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `input2`.
210    ///
211    /// The output is stored as a [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `output`.
212    CrossJoin {
213        input1: RegisterIndex,
214        input2: RegisterIndex,
215        output: RegisterIndex,
216    },
217
218    /// Perform a natural join of the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `input1` and the [`Register::TableRef](`crate::vm::Register::TableRef) in register `input2`.
219    ///
220    /// The output is stored as a [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `output`.
221    ///
222    /// Note: this is both a left and a right join i.e., there will be `NULL`s where the common
223    /// columns do not match. The result must be filtered at a later stage.
224    NaturalJoin {
225        input1: RegisterIndex,
226        input2: RegisterIndex,
227        output: RegisterIndex,
228    },
229}
230
231// TODO: implement these features in the vm and use the SQL statements here to test them.
232// #[cfg(test)]
233// mod test {
234//     use sqlparser::ast::{ColumnOption, ColumnOptionDef, DataType};
235
236//     use crate::{
237//         expr::{BinOp, Expr, UnOp},
238//         table::TABLE_UNIQUE_KEY_NAME,
239//         value,
240//         vm::RegisterIndex,
241//     };
242
243//     use super::{Instruction::*, IntermediateCode};
244
245//     #[test]
246//     fn alter_statements() {
247//         // `ALTER TABLE table1 ADD COLUMN col4 STRING NULL`
248//         let table_reg_index = RegisterIndex::default();
249//         let col_index = table_reg_index.next_index();
250//         let _ = IntermediateCode {
251//             instrs: vec![
252//                 Source {
253//                     index: table_reg_index,
254//                     name: "table1".into(),
255//                 },
256//                 ColumnDef {
257//                     index: col_index,
258//                     name: "col4".into(),
259//                     data_type: DataType::String,
260//                 },
261//                 AddColumnOption {
262//                     index: col_index,
263//                     option: ColumnOptionDef {
264//                         name: None,
265//                         option: ColumnOption::Null,
266//                     },
267//                 },
268//                 AddColumn {
269//                     table_reg_index,
270//                     col_index,
271//                 },
272//             ],
273//         };
274
275//         // `ALTER TABLE table1 RENAME COLUMN col4 col5`
276//         let table_reg_index = RegisterIndex::default();
277//         let _ = IntermediateCode {
278//             instrs: vec![
279//                 Source {
280//                     index: table_reg_index,
281//                     name: "table1".into(),
282//                 },
283//                 RenameColumn {
284//                     index: table_reg_index,
285//                     old_name: "col4".into(),
286//                     new_name: "col5".into(),
287//                 },
288//             ],
289//         };
290
291//         // `ALTER TABLE table1 DROP COLUMN col5`
292//         let table_reg_index = RegisterIndex::default();
293//         let _ = IntermediateCode {
294//             instrs: vec![
295//                 Source {
296//                     index: table_reg_index,
297//                     name: "table1".into(),
298//                 },
299//                 RemoveColumn {
300//                     index: table_reg_index,
301//                     col_name: "col5".into(),
302//                 },
303//             ],
304//         };
305//     }
306
307//     #[test]
308//     fn select_with_joins() {
309//         // `SELECT col1, col2, col5 FROM table1 INNER JOIN table2 ON table1.col2 = table2.col3`
310//         let table_reg_index = RegisterIndex::default();
311//         let table_reg_index_2 = table_reg_index.next_index();
312//         let table_reg_index_3 = table_reg_index_2.next_index();
313//         let table_reg_index_4 = table_reg_index_3.next_index();
314//         let _ = IntermediateCode {
315//             instrs: vec![
316//                 Source {
317//                     index: table_reg_index,
318//                     name: "table1".into(),
319//                 },
320//                 Source {
321//                     index: table_reg_index_2,
322//                     name: "table2".into(),
323//                 },
324//                 CrossJoin {
325//                     input1: table_reg_index,
326//                     input2: table_reg_index_2,
327//                     output: table_reg_index_3,
328//                 },
329//                 Filter {
330//                     index: table_reg_index_3,
331//                     expr: Expr::Binary {
332//                         left: Box::new(Expr::ColumnRef("table1.col2".into())),
333//                         op: BinOp::Equal,
334//                         right: Box::new(Expr::ColumnRef("table2.col3".into())),
335//                     },
336//                 },
337//                 // Inner join, so remove NULLs
338//                 Filter {
339//                     index: table_reg_index_3,
340//                     expr: Expr::Unary {
341//                         op: UnOp::IsNotNull,
342//                         operand: Box::new(Expr::ColumnRef(
343//                             format!("table1.{}", TABLE_UNIQUE_KEY_NAME).as_str().into(),
344//                         )),
345//                     },
346//                 },
347//                 // Inner join, so remove NULLs
348//                 Filter {
349//                     index: table_reg_index_3,
350//                     expr: Expr::Unary {
351//                         op: UnOp::IsNotNull,
352//                         operand: Box::new(Expr::ColumnRef(
353//                             format!("table2.{}", TABLE_UNIQUE_KEY_NAME).as_str().into(),
354//                         )),
355//                     },
356//                 },
357//                 Empty {
358//                     index: table_reg_index_4,
359//                 },
360//                 Project {
361//                     input: table_reg_index_3,
362//                     output: table_reg_index_4,
363//                     expr: Expr::ColumnRef("col1".into()),
364//                     alias: None,
365//                 },
366//                 Project {
367//                     input: table_reg_index_3,
368//                     output: table_reg_index_4,
369//                     expr: Expr::ColumnRef("col2".into()),
370//                     alias: None,
371//                 },
372//                 Project {
373//                     input: table_reg_index_3,
374//                     output: table_reg_index_4,
375//                     expr: Expr::ColumnRef("col5".into()),
376//                     alias: None,
377//                 },
378//                 Return {
379//                     index: table_reg_index_4,
380//                 },
381//             ],
382//         };
383
384//         // `SELECT col1, col2, col5 FROM table1, table2`
385//         let table_reg_index = RegisterIndex::default();
386//         let table_reg_index_2 = table_reg_index.next_index();
387//         let table_reg_index_3 = table_reg_index_2.next_index();
388//         let table_reg_index_4 = table_reg_index_3.next_index();
389//         let _ = IntermediateCode {
390//             instrs: vec![
391//                 Source {
392//                     index: table_reg_index,
393//                     name: "table1".into(),
394//                 },
395//                 Source {
396//                     index: table_reg_index_2,
397//                     name: "table2".into(),
398//                 },
399//                 CrossJoin {
400//                     input1: table_reg_index,
401//                     input2: table_reg_index_2,
402//                     output: table_reg_index_3,
403//                 },
404//                 Empty {
405//                     index: table_reg_index_4,
406//                 },
407//                 Project {
408//                     input: table_reg_index_3,
409//                     output: table_reg_index_4,
410//                     expr: Expr::ColumnRef("col1".into()),
411//                     alias: None,
412//                 },
413//                 Project {
414//                     input: table_reg_index_3,
415//                     output: table_reg_index_4,
416//                     expr: Expr::ColumnRef("col2".into()),
417//                     alias: None,
418//                 },
419//                 Project {
420//                     input: table_reg_index_3,
421//                     output: table_reg_index_4,
422//                     expr: Expr::ColumnRef("col5".into()),
423//                     alias: None,
424//                 },
425//                 Return {
426//                     index: table_reg_index_4,
427//                 },
428//             ],
429//         };
430
431//         // `SELECT col1, col2, col5 FROM table1 NATURAL JOIN table2`
432//         let table_reg_index = RegisterIndex::default();
433//         let table_reg_index_2 = table_reg_index.next_index();
434//         let table_reg_index_3 = table_reg_index_2.next_index();
435//         let table_reg_index_4 = table_reg_index_3.next_index();
436//         let _ = IntermediateCode {
437//             instrs: vec![
438//                 Source {
439//                     index: table_reg_index,
440//                     name: "table1".into(),
441//                 },
442//                 Source {
443//                     index: table_reg_index_2,
444//                     name: "table2".into(),
445//                 },
446//                 NaturalJoin {
447//                     input1: table_reg_index,
448//                     input2: table_reg_index_2,
449//                     output: table_reg_index_3,
450//                 },
451//                 // Not an outer join, so remove NULLs
452//                 Filter {
453//                     index: table_reg_index_3,
454//                     expr: Expr::Unary {
455//                         op: UnOp::IsNotNull,
456//                         operand: Box::new(Expr::ColumnRef(
457//                             format!("table1.{}", TABLE_UNIQUE_KEY_NAME).as_str().into(),
458//                         )),
459//                     },
460//                 },
461//                 // Not an outer join, so remove NULLs
462//                 Filter {
463//                     index: table_reg_index_3,
464//                     expr: Expr::Unary {
465//                         op: UnOp::IsNotNull,
466//                         operand: Box::new(Expr::ColumnRef(
467//                             format!("table2.{}", TABLE_UNIQUE_KEY_NAME).as_str().into(),
468//                         )),
469//                     },
470//                 },
471//                 Empty {
472//                     index: table_reg_index_4,
473//                 },
474//                 Project {
475//                     input: table_reg_index_3,
476//                     output: table_reg_index_4,
477//                     expr: Expr::ColumnRef("col1".into()),
478//                     alias: None,
479//                 },
480//                 Project {
481//                     input: table_reg_index_3,
482//                     output: table_reg_index_4,
483//                     expr: Expr::ColumnRef("col2".into()),
484//                     alias: None,
485//                 },
486//                 Project {
487//                     input: table_reg_index_3,
488//                     output: table_reg_index_4,
489//                     expr: Expr::ColumnRef("col5".into()),
490//                     alias: None,
491//                 },
492//                 Return {
493//                     index: table_reg_index_4,
494//                 },
495//             ],
496//         };
497
498//         // `SELECT col1, col2, col5 FROM table1 LEFT OUTER JOIN table2 ON table1.col2 = table2.col3`
499//         let table_reg_index = RegisterIndex::default();
500//         let table_reg_index_2 = table_reg_index.next_index();
501//         let table_reg_index_3 = table_reg_index_2.next_index();
502//         let table_reg_index_4 = table_reg_index_3.next_index();
503//         let _ = IntermediateCode {
504//             instrs: vec![
505//                 Source {
506//                     index: table_reg_index,
507//                     name: "table1".into(),
508//                 },
509//                 Source {
510//                     index: table_reg_index_2,
511//                     name: "table2".into(),
512//                 },
513//                 CrossJoin {
514//                     input1: table_reg_index,
515//                     input2: table_reg_index_2,
516//                     output: table_reg_index_3,
517//                 },
518//                 Filter {
519//                     index: table_reg_index_3,
520//                     expr: Expr::Binary {
521//                         left: Box::new(Expr::ColumnRef("table1.col2".into())),
522//                         op: BinOp::Equal,
523//                         right: Box::new(Expr::ColumnRef("table2.col3".into())),
524//                     },
525//                 },
526//                 // Left outer join, so don't remove NULLs from first table
527//                 // Left outer join, so remove NULLs from second table
528//                 Filter {
529//                     index: table_reg_index_3,
530//                     expr: Expr::Unary {
531//                         op: UnOp::IsNotNull,
532//                         operand: Box::new(Expr::ColumnRef(
533//                             format!("table2.{}", TABLE_UNIQUE_KEY_NAME).as_str().into(),
534//                         )),
535//                     },
536//                 },
537//                 Empty {
538//                     index: table_reg_index_4,
539//                 },
540//                 Project {
541//                     input: table_reg_index_3,
542//                     output: table_reg_index_4,
543//                     expr: Expr::ColumnRef("col1".into()),
544//                     alias: None,
545//                 },
546//                 Project {
547//                     input: table_reg_index_3,
548//                     output: table_reg_index_4,
549//                     expr: Expr::ColumnRef("col2".into()),
550//                     alias: None,
551//                 },
552//                 Project {
553//                     input: table_reg_index_3,
554//                     output: table_reg_index_4,
555//                     expr: Expr::ColumnRef("col5".into()),
556//                     alias: None,
557//                 },
558//                 Return {
559//                     index: table_reg_index_4,
560//                 },
561//             ],
562//         };
563//     }
564
565//     #[test]
566//     fn update_statements() {
567//         // TODO
568//         // `UPDATE table1 SET col2 = 'bar' WHERE col1 = 1`
569
570//         // `UPDATE table1 SET col2 = 'bar' WHERE col1 = 1 AND col3 = 2`
571
572//         // `UPDATE table1 SET col2 = 'bar', col3 = 4 WHERE col1 = 1 AND col3 = 2`
573
574//         // `UPDATE table1 SET col2 = 'bar' WHERE col1 = 1 OR col3 = 2`
575
576//         // `UPDATE table1 SET col3 = col3 + 1 WHERE col2 = 'foo'`
577
578//         // `UPDATE table1, table2 SET table1.col3 = table1.col3 + 1 WHERE table2.col2 = 'foo'`
579//     }
580// }