otter_sql/ic.rs
1//! Intermediate representation (IR) and instruction set for an SQL database.
2
3use fmt_derive::{Debug, Display};
4
5use sqlparser::ast::{ColumnOptionDef, DataType};
6
7use crate::{
8 expr::Expr,
9 identifier::{SchemaRef, TableRef},
10 value::Value,
11 vm::RegisterIndex,
12 BoundedString,
13};
14
15#[derive(Debug, Clone)]
16/// The intermediate representation of a query. Made of up [`Instruction`]s.
17pub struct IntermediateCode {
18 pub instrs: Vec<Instruction>,
19}
20
21/// The instruction set of OtterSQL.
22#[derive(Display, Debug, Clone, PartialEq)]
23pub enum Instruction {
24 /// Load a [`Value`] into a register.
25 Value { index: RegisterIndex, value: Value },
26
27 /// Load a [`Expr`] into a register.
28 Expr { index: RegisterIndex, expr: Expr },
29
30 /// Load an *existing* table given by `name`.
31 ///
32 /// This will result in a [`Register::TableRef`](`crate::vm::Register::TableRef`) being stored at the
33 /// given register.
34 Source {
35 index: RegisterIndex,
36 name: TableRef,
37 },
38
39 /// Create a new empty [`Register::TableRef`](`crate::vm::Register::TableRef`).
40 Empty { index: RegisterIndex },
41
42 /// Create a new [`Register::TableRef](`crate::vm::Register::TableRef) pointing to a
43 /// non-existent table.
44 NonExistent { index: RegisterIndex },
45
46 /// Filter the [`Register::TableRef`](`crate::vm::Register::TableRef`) at `index` using the given expression.
47 ///
48 /// This represents a `WHERE` clause of a `SELECT` statement in SQL.
49 Filter { index: RegisterIndex, expr: Expr },
50
51 /// Create a projection of the columns of the [`Register::TableRef`](`crate::vm::Register::TableRef`) at `input`.
52 ///
53 /// The resultant column is added to the [`Register::TableRef`](`crate::vm::Register::TableRef`)
54 /// at `output`. It must be either an empty table or a table with the same number of rows.
55 ///
56 /// This represents the column list of the `SELECT` statement in SQL.
57 Project {
58 input: RegisterIndex,
59 output: RegisterIndex,
60 expr: Expr,
61 #[display(
62 "{}",
63 match alias {
64 None => "None".to_owned(),
65 Some(alias) => format!("{}", alias)
66 }
67 )]
68 alias: Option<BoundedString>,
69 },
70
71 /// Group the [`Register::TableRef`](`crate::vm::Register::TableRef`) at `index` by the given expression.
72 ///
73 /// This will result in a [`Register::GroupedTable`](`crate::vm::Register::GroupedTable`) being stored at the `index` register.
74 ///
75 /// Must be added before any projections so as to catch errors in column selections.
76 GroupBy { index: RegisterIndex, expr: Expr },
77
78 /// Order the [`Register::TableRef`](`crate::vm::Register::TableRef`) at `index` by the given expression.
79 ///
80 /// This represents the `ORDER BY` clause in SQL.
81 Order {
82 index: RegisterIndex,
83 expr: Expr,
84 ascending: bool,
85 },
86
87 /// Truncate the [`Register::TableRef`](`crate::vm::Register::TableRef`) at `index` to the given number of rows.
88 ///
89 /// This represents the `LIMIT` clause in SQL.
90 Limit { index: RegisterIndex, limit: u64 },
91
92 /// Return from register at `index`.
93 ///
94 /// Some values stored in a register may be intermediate values and cannot be returned.
95 /// See [`Register`](`crate::vm::Register`) for more information.
96 Return { index: RegisterIndex },
97
98 /// Create a new schema.
99 ///
100 /// This represents a `CREATE SCHEMA [IF NOT EXISTS]` statement.
101 NewSchema {
102 schema_name: SchemaRef,
103 /// If `true`, the schema is not created if it exists and no error is returned.
104 exists_ok: bool,
105 },
106
107 /// Start defining a new column and store the temporary metadata in register `index`.
108 ///
109 /// The value stored in the register will be of type [`Register::Column`](`crate::vm::Register::Column`).
110 ColumnDef {
111 index: RegisterIndex,
112 /// The column name.
113 name: BoundedString,
114 data_type: DataType,
115 },
116
117 /// Add an option or constraint to the [`Column`](`crate::vm::Register::Column`) definition in register `index`.
118 AddColumnOption {
119 index: RegisterIndex,
120 option: ColumnOptionDef,
121 },
122
123 /// Add column in register `col_index` to the [`Register::TableRef`](`crate::vm::Register::TableRef`) in `table_reg_index`.
124 AddColumn {
125 table_reg_index: RegisterIndex,
126 col_index: RegisterIndex,
127 },
128
129 /// Create table from the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `index`.
130 ///
131 /// Creation implies that the table is added to the schema.
132 ///
133 /// This represents a `CREATE TABLE [IF NOT EXISTS]` statement.
134 NewTable {
135 index: RegisterIndex,
136 name: TableRef,
137 /// If `true`, the table is not created if it exists and no error is returned.
138 exists_ok: bool,
139 },
140
141 /// Drop the table referenced by the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `index`.
142 DropTable { index: RegisterIndex },
143
144 /// Remove the given column from the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `index`.
145 RemoveColumn {
146 index: RegisterIndex,
147 col_name: BoundedString,
148 },
149
150 /// Rename an existing column from the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `index`.
151 RenameColumn {
152 index: RegisterIndex,
153 old_name: BoundedString,
154 new_name: BoundedString,
155 },
156
157 /// Start a new insertion into the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `view_index`.
158 ///
159 /// A [`Register::InsertDef`](`crate::vm::Register::InsertDef`) is stored in register `index`.
160 InsertDef {
161 table_reg_index: RegisterIndex,
162 index: RegisterIndex,
163 },
164
165 /// Add a column to the [`Register::InsertDef`](`crate::vm::Register::InsertDef`) in register `index`.
166 ColumnInsertDef {
167 insert_index: RegisterIndex,
168 col_name: BoundedString,
169 },
170
171 /// Start defining a new row of data to be inserted into the [`Register::InsertDef`](`crate::vm::Register::InsertDef`) in register `insert_index`.
172 ///
173 /// The value stored in the register `index` will be of type [`Register::InsertRow`](`crate::vm::Register::InsertRow`).
174 RowDef {
175 insert_index: RegisterIndex,
176 row_index: RegisterIndex,
177 },
178
179 /// Add a value to the [`Register::InsertRow`](`crate::vm::Register::InsertRow`) in register `index`.
180 AddValue {
181 row_index: RegisterIndex,
182 expr: Expr,
183 },
184
185 /// Perform insertion defined in the [`Register::InsertRow`](`crate::vm::Register::InsertRow`) in register `index`.
186 ///
187 /// This represents an `INSERT INTO` statement.
188 Insert { index: RegisterIndex },
189
190 /// Update values of the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `index`.
191 ///
192 /// This represents an `UPDATE` statement.
193 Update {
194 index: RegisterIndex,
195 col: Expr,
196 expr: Expr,
197 },
198
199 /// Perform a union of the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `input1` and the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `input2`.
200 ///
201 /// The output is stored as a [`Register::TableRef`](`crate::vm::Register::TableRef`) in register
202 /// `output`.
203 Union {
204 input1: RegisterIndex,
205 input2: RegisterIndex,
206 output: RegisterIndex,
207 },
208
209 /// Perform a cartesian join of the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `input1` and the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `input2`.
210 ///
211 /// The output is stored as a [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `output`.
212 CrossJoin {
213 input1: RegisterIndex,
214 input2: RegisterIndex,
215 output: RegisterIndex,
216 },
217
218 /// Perform a natural join of the [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `input1` and the [`Register::TableRef](`crate::vm::Register::TableRef) in register `input2`.
219 ///
220 /// The output is stored as a [`Register::TableRef`](`crate::vm::Register::TableRef`) in register `output`.
221 ///
222 /// Note: this is both a left and a right join i.e., there will be `NULL`s where the common
223 /// columns do not match. The result must be filtered at a later stage.
224 NaturalJoin {
225 input1: RegisterIndex,
226 input2: RegisterIndex,
227 output: RegisterIndex,
228 },
229}
230
231// TODO: implement these features in the vm and use the SQL statements here to test them.
232// #[cfg(test)]
233// mod test {
234// use sqlparser::ast::{ColumnOption, ColumnOptionDef, DataType};
235
236// use crate::{
237// expr::{BinOp, Expr, UnOp},
238// table::TABLE_UNIQUE_KEY_NAME,
239// value,
240// vm::RegisterIndex,
241// };
242
243// use super::{Instruction::*, IntermediateCode};
244
245// #[test]
246// fn alter_statements() {
247// // `ALTER TABLE table1 ADD COLUMN col4 STRING NULL`
248// let table_reg_index = RegisterIndex::default();
249// let col_index = table_reg_index.next_index();
250// let _ = IntermediateCode {
251// instrs: vec![
252// Source {
253// index: table_reg_index,
254// name: "table1".into(),
255// },
256// ColumnDef {
257// index: col_index,
258// name: "col4".into(),
259// data_type: DataType::String,
260// },
261// AddColumnOption {
262// index: col_index,
263// option: ColumnOptionDef {
264// name: None,
265// option: ColumnOption::Null,
266// },
267// },
268// AddColumn {
269// table_reg_index,
270// col_index,
271// },
272// ],
273// };
274
275// // `ALTER TABLE table1 RENAME COLUMN col4 col5`
276// let table_reg_index = RegisterIndex::default();
277// let _ = IntermediateCode {
278// instrs: vec![
279// Source {
280// index: table_reg_index,
281// name: "table1".into(),
282// },
283// RenameColumn {
284// index: table_reg_index,
285// old_name: "col4".into(),
286// new_name: "col5".into(),
287// },
288// ],
289// };
290
291// // `ALTER TABLE table1 DROP COLUMN col5`
292// let table_reg_index = RegisterIndex::default();
293// let _ = IntermediateCode {
294// instrs: vec![
295// Source {
296// index: table_reg_index,
297// name: "table1".into(),
298// },
299// RemoveColumn {
300// index: table_reg_index,
301// col_name: "col5".into(),
302// },
303// ],
304// };
305// }
306
307// #[test]
308// fn select_with_joins() {
309// // `SELECT col1, col2, col5 FROM table1 INNER JOIN table2 ON table1.col2 = table2.col3`
310// let table_reg_index = RegisterIndex::default();
311// let table_reg_index_2 = table_reg_index.next_index();
312// let table_reg_index_3 = table_reg_index_2.next_index();
313// let table_reg_index_4 = table_reg_index_3.next_index();
314// let _ = IntermediateCode {
315// instrs: vec![
316// Source {
317// index: table_reg_index,
318// name: "table1".into(),
319// },
320// Source {
321// index: table_reg_index_2,
322// name: "table2".into(),
323// },
324// CrossJoin {
325// input1: table_reg_index,
326// input2: table_reg_index_2,
327// output: table_reg_index_3,
328// },
329// Filter {
330// index: table_reg_index_3,
331// expr: Expr::Binary {
332// left: Box::new(Expr::ColumnRef("table1.col2".into())),
333// op: BinOp::Equal,
334// right: Box::new(Expr::ColumnRef("table2.col3".into())),
335// },
336// },
337// // Inner join, so remove NULLs
338// Filter {
339// index: table_reg_index_3,
340// expr: Expr::Unary {
341// op: UnOp::IsNotNull,
342// operand: Box::new(Expr::ColumnRef(
343// format!("table1.{}", TABLE_UNIQUE_KEY_NAME).as_str().into(),
344// )),
345// },
346// },
347// // Inner join, so remove NULLs
348// Filter {
349// index: table_reg_index_3,
350// expr: Expr::Unary {
351// op: UnOp::IsNotNull,
352// operand: Box::new(Expr::ColumnRef(
353// format!("table2.{}", TABLE_UNIQUE_KEY_NAME).as_str().into(),
354// )),
355// },
356// },
357// Empty {
358// index: table_reg_index_4,
359// },
360// Project {
361// input: table_reg_index_3,
362// output: table_reg_index_4,
363// expr: Expr::ColumnRef("col1".into()),
364// alias: None,
365// },
366// Project {
367// input: table_reg_index_3,
368// output: table_reg_index_4,
369// expr: Expr::ColumnRef("col2".into()),
370// alias: None,
371// },
372// Project {
373// input: table_reg_index_3,
374// output: table_reg_index_4,
375// expr: Expr::ColumnRef("col5".into()),
376// alias: None,
377// },
378// Return {
379// index: table_reg_index_4,
380// },
381// ],
382// };
383
384// // `SELECT col1, col2, col5 FROM table1, table2`
385// let table_reg_index = RegisterIndex::default();
386// let table_reg_index_2 = table_reg_index.next_index();
387// let table_reg_index_3 = table_reg_index_2.next_index();
388// let table_reg_index_4 = table_reg_index_3.next_index();
389// let _ = IntermediateCode {
390// instrs: vec![
391// Source {
392// index: table_reg_index,
393// name: "table1".into(),
394// },
395// Source {
396// index: table_reg_index_2,
397// name: "table2".into(),
398// },
399// CrossJoin {
400// input1: table_reg_index,
401// input2: table_reg_index_2,
402// output: table_reg_index_3,
403// },
404// Empty {
405// index: table_reg_index_4,
406// },
407// Project {
408// input: table_reg_index_3,
409// output: table_reg_index_4,
410// expr: Expr::ColumnRef("col1".into()),
411// alias: None,
412// },
413// Project {
414// input: table_reg_index_3,
415// output: table_reg_index_4,
416// expr: Expr::ColumnRef("col2".into()),
417// alias: None,
418// },
419// Project {
420// input: table_reg_index_3,
421// output: table_reg_index_4,
422// expr: Expr::ColumnRef("col5".into()),
423// alias: None,
424// },
425// Return {
426// index: table_reg_index_4,
427// },
428// ],
429// };
430
431// // `SELECT col1, col2, col5 FROM table1 NATURAL JOIN table2`
432// let table_reg_index = RegisterIndex::default();
433// let table_reg_index_2 = table_reg_index.next_index();
434// let table_reg_index_3 = table_reg_index_2.next_index();
435// let table_reg_index_4 = table_reg_index_3.next_index();
436// let _ = IntermediateCode {
437// instrs: vec![
438// Source {
439// index: table_reg_index,
440// name: "table1".into(),
441// },
442// Source {
443// index: table_reg_index_2,
444// name: "table2".into(),
445// },
446// NaturalJoin {
447// input1: table_reg_index,
448// input2: table_reg_index_2,
449// output: table_reg_index_3,
450// },
451// // Not an outer join, so remove NULLs
452// Filter {
453// index: table_reg_index_3,
454// expr: Expr::Unary {
455// op: UnOp::IsNotNull,
456// operand: Box::new(Expr::ColumnRef(
457// format!("table1.{}", TABLE_UNIQUE_KEY_NAME).as_str().into(),
458// )),
459// },
460// },
461// // Not an outer join, so remove NULLs
462// Filter {
463// index: table_reg_index_3,
464// expr: Expr::Unary {
465// op: UnOp::IsNotNull,
466// operand: Box::new(Expr::ColumnRef(
467// format!("table2.{}", TABLE_UNIQUE_KEY_NAME).as_str().into(),
468// )),
469// },
470// },
471// Empty {
472// index: table_reg_index_4,
473// },
474// Project {
475// input: table_reg_index_3,
476// output: table_reg_index_4,
477// expr: Expr::ColumnRef("col1".into()),
478// alias: None,
479// },
480// Project {
481// input: table_reg_index_3,
482// output: table_reg_index_4,
483// expr: Expr::ColumnRef("col2".into()),
484// alias: None,
485// },
486// Project {
487// input: table_reg_index_3,
488// output: table_reg_index_4,
489// expr: Expr::ColumnRef("col5".into()),
490// alias: None,
491// },
492// Return {
493// index: table_reg_index_4,
494// },
495// ],
496// };
497
498// // `SELECT col1, col2, col5 FROM table1 LEFT OUTER JOIN table2 ON table1.col2 = table2.col3`
499// let table_reg_index = RegisterIndex::default();
500// let table_reg_index_2 = table_reg_index.next_index();
501// let table_reg_index_3 = table_reg_index_2.next_index();
502// let table_reg_index_4 = table_reg_index_3.next_index();
503// let _ = IntermediateCode {
504// instrs: vec![
505// Source {
506// index: table_reg_index,
507// name: "table1".into(),
508// },
509// Source {
510// index: table_reg_index_2,
511// name: "table2".into(),
512// },
513// CrossJoin {
514// input1: table_reg_index,
515// input2: table_reg_index_2,
516// output: table_reg_index_3,
517// },
518// Filter {
519// index: table_reg_index_3,
520// expr: Expr::Binary {
521// left: Box::new(Expr::ColumnRef("table1.col2".into())),
522// op: BinOp::Equal,
523// right: Box::new(Expr::ColumnRef("table2.col3".into())),
524// },
525// },
526// // Left outer join, so don't remove NULLs from first table
527// // Left outer join, so remove NULLs from second table
528// Filter {
529// index: table_reg_index_3,
530// expr: Expr::Unary {
531// op: UnOp::IsNotNull,
532// operand: Box::new(Expr::ColumnRef(
533// format!("table2.{}", TABLE_UNIQUE_KEY_NAME).as_str().into(),
534// )),
535// },
536// },
537// Empty {
538// index: table_reg_index_4,
539// },
540// Project {
541// input: table_reg_index_3,
542// output: table_reg_index_4,
543// expr: Expr::ColumnRef("col1".into()),
544// alias: None,
545// },
546// Project {
547// input: table_reg_index_3,
548// output: table_reg_index_4,
549// expr: Expr::ColumnRef("col2".into()),
550// alias: None,
551// },
552// Project {
553// input: table_reg_index_3,
554// output: table_reg_index_4,
555// expr: Expr::ColumnRef("col5".into()),
556// alias: None,
557// },
558// Return {
559// index: table_reg_index_4,
560// },
561// ],
562// };
563// }
564
565// #[test]
566// fn update_statements() {
567// // TODO
568// // `UPDATE table1 SET col2 = 'bar' WHERE col1 = 1`
569
570// // `UPDATE table1 SET col2 = 'bar' WHERE col1 = 1 AND col3 = 2`
571
572// // `UPDATE table1 SET col2 = 'bar', col3 = 4 WHERE col1 = 1 AND col3 = 2`
573
574// // `UPDATE table1 SET col2 = 'bar' WHERE col1 = 1 OR col3 = 2`
575
576// // `UPDATE table1 SET col3 = col3 + 1 WHERE col2 = 'foo'`
577
578// // `UPDATE table1, table2 SET table1.col3 = table1.col3 + 1 WHERE table2.col2 = 'foo'`
579// }
580// }