Skip to main content

lutra_sql/
lib.rs

1//! SQL Abstract Syntax Tree (AST) types
2#![cfg_attr(not(feature = "std"), no_std)]
3
4#[cfg(not(feature = "std"))]
5extern crate alloc;
6
7mod display_utils;
8mod dml;
9mod query;
10mod string;
11
12#[cfg(not(feature = "std"))]
13use alloc::{
14    boxed::Box,
15    format,
16    string::{String, ToString},
17    vec::Vec,
18};
19
20use core::cmp::Ordering;
21use core::{
22    fmt::{self, Display},
23    hash,
24};
25
26use display_utils::{NewLine, SpaceOrNewline};
27
28pub use self::dml::{Assignment, AssignmentTarget, Delete, Insert, Update};
29pub use self::query::{
30    Copy, Cte, CteAsMaterialized, Distinct, ExprWithAlias, Join, JoinConstraint, JoinOperator,
31    LateralView, OrderBy, OrderByExpr, OrderByKind, OrderByOptions, Query, RelExpr, RelNamed,
32    Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, TableAlias, TableVersion,
33    Values, With,
34};
35
36pub use self::string::escape as escape_string;
37pub use display_utils::{DisplayCommaSeparated, Indent};
38
39pub struct DisplaySeparated<'a, T>
40where
41    T: fmt::Display,
42{
43    slice: &'a [T],
44    sep: &'static str,
45}
46
47impl<T> fmt::Display for DisplaySeparated<'_, T>
48where
49    T: fmt::Display,
50{
51    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
52        let mut delim = "";
53        for t in self.slice {
54            f.write_str(delim)?;
55            delim = self.sep;
56            t.fmt(f)?;
57        }
58        Ok(())
59    }
60}
61
62pub fn display_separated<'a, T>(slice: &'a [T], sep: &'static str) -> DisplaySeparated<'a, T>
63where
64    T: fmt::Display,
65{
66    DisplaySeparated { slice, sep }
67}
68
69pub fn display_comma_separated<T>(slice: &[T]) -> DisplaySeparated<'_, T>
70where
71    T: fmt::Display,
72{
73    DisplaySeparated { slice, sep: ", " }
74}
75
76/// An identifier, decomposed into its value or character data and the quote style.
77#[derive(Debug, Clone)]
78pub struct Ident {
79    /// The value of the identifier without quotes.
80    pub value: String,
81    /// The starting quote if any. Valid quote characters are the single quote,
82    /// double quote, backtick, and opening square bracket.
83    pub quote_style: Option<char>,
84}
85
86impl PartialEq for Ident {
87    fn eq(&self, other: &Self) -> bool {
88        let Ident {
89            value,
90            quote_style,
91            // exhaustiveness check; we ignore spans in comparisons
92        } = self;
93
94        value == &other.value && quote_style == &other.quote_style
95    }
96}
97
98impl core::hash::Hash for Ident {
99    fn hash<H: hash::Hasher>(&self, state: &mut H) {
100        let Ident {
101            value,
102            quote_style,
103            // exhaustiveness check; we ignore spans in hashes
104        } = self;
105
106        value.hash(state);
107        quote_style.hash(state);
108    }
109}
110
111impl Eq for Ident {}
112
113impl PartialOrd for Ident {
114    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
115        Some(self.cmp(other))
116    }
117}
118
119impl Ord for Ident {
120    fn cmp(&self, other: &Self) -> Ordering {
121        let Ident {
122            value,
123            quote_style,
124            // exhaustiveness check; we ignore spans in ordering
125        } = self;
126
127        let Ident {
128            value: other_value,
129            quote_style: other_quote_style,
130            // exhaustiveness check; we ignore spans in ordering
131        } = other;
132
133        // First compare by value, then by quote_style
134        value
135            .cmp(other_value)
136            .then_with(|| quote_style.cmp(other_quote_style))
137    }
138}
139
140impl Ident {
141    /// Create a new identifier with the given value and no quotes and an empty span.
142    pub fn new<S>(value: S) -> Self
143    where
144        S: Into<String>,
145    {
146        Ident {
147            value: value.into(),
148            quote_style: None,
149        }
150    }
151
152    /// Create a new quoted identifier with the given quote and value. This function
153    /// panics if the given quote is not a valid quote character.
154    pub fn with_quote_if_needed<S>(quote: char, value: S) -> Self
155    where
156        S: Into<String>,
157    {
158        let value = value.into();
159        let quote_style = if valid_ident_regex().is_match(&value) && !is_keyword(&value) {
160            None
161        } else {
162            Some(quote)
163        };
164        Ident { value, quote_style }
165    }
166}
167
168fn valid_ident_regex() -> &'static regex::Regex {
169    static VALID_IDENT: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
170    VALID_IDENT.get_or_init(|| {
171        // One of:
172        // - `*`
173        // - An ident starting with `a-z_\$` and containing other characters `a-z0-9_\$`
174        //
175        // We could replace this with pomsky (regex<>pomsky : sql<>prql)
176        // ^ ('*' | [ascii_lower '_$'] [ascii_lower ascii_digit '_$']* ) $
177        regex::Regex::new(r"^((\*)|(^[a-z_\$][a-z0-9_\$]*))$").unwrap()
178    })
179}
180
181fn is_keyword(ident: &str) -> bool {
182    const KEYWORDS: &[&str] = &[
183        "select", "from", "where", "group", "by", "limit", "offset", "distinct", "on", "none",
184        "some", "end", "time",
185    ];
186    KEYWORDS.contains(&ident)
187}
188
189impl From<&str> for Ident {
190    fn from(value: &str) -> Self {
191        Ident {
192            value: value.to_string(),
193            quote_style: None,
194        }
195    }
196}
197
198impl fmt::Display for Ident {
199    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
200        match self.quote_style {
201            Some(q) if q == '"' || q == '\'' || q == '`' => {
202                let escaped = string::escape(&self.value, q);
203                write!(f, "{q}{escaped}{q}")
204            }
205            Some('[') => write!(f, "[{}]", self.value),
206            None => f.write_str(&self.value),
207            _ => panic!("unexpected quote style"),
208        }
209    }
210}
211
212/// A name of a table, view, custom type, etc., possibly multi-part, i.e. db.schema.obj
213#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
214pub struct ObjectName(pub Vec<Ident>);
215
216impl From<Vec<Ident>> for ObjectName {
217    fn from(idents: Vec<Ident>) -> Self {
218        ObjectName(idents)
219    }
220}
221
222impl fmt::Display for ObjectName {
223    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
224        write!(f, "{}", display_separated(&self.0, "."))
225    }
226}
227
228/// A WHEN clause in a CASE expression containing both
229/// the condition and its corresponding result
230#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
231pub struct CaseWhen {
232    pub condition: Expr,
233    pub result: Expr,
234}
235
236impl fmt::Display for CaseWhen {
237    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
238        f.write_str("WHEN ")?;
239        self.condition.fmt(f)?;
240        f.write_str(" THEN")?;
241        SpaceOrNewline.fmt(f)?;
242        Indent(&self.result).fmt(f)?;
243        Ok(())
244    }
245}
246
247/// An SQL expression of any type.
248///
249/// # Semantics / Type Checking
250///
251/// The parser does not distinguish between expressions of different types
252/// (e.g. boolean vs string). The caller is responsible for detecting and
253/// validating types as necessary (for example  `WHERE 1` vs `SELECT 1=1`)
254/// See the [README.md] for more details.
255///
256/// [README.md]: https://github.com/apache/datafusion-sqlparser-rs/blob/main/README.md#syntax-vs-semantics
257///
258/// # Equality and Hashing Does not Include Source Locations
259///
260/// The `Expr` type implements `PartialEq` and `Eq` based on the semantic value
261/// of the expression (not bitwise comparison). This means that `Expr` instances
262/// that are semantically equivalent but have different spans (locations in the
263/// source tree) will compare as equal.
264#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
265pub enum Expr {
266    /// Direct SQL source
267    Source(String),
268    Identifier(Ident),
269    CompoundIdentifier(Vec<Ident>),
270    IndexBy(Vec<Expr>),
271    Case {
272        operand: Option<Box<Expr>>,
273        cases: Vec<CaseWhen>,
274        else_result: Option<Box<Expr>>,
275    },
276    Subquery(Box<Query>),
277}
278
279impl fmt::Display for Expr {
280    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
281        match self {
282            Expr::Source(s) => f.write_str(s),
283            Expr::Identifier(s) => write!(f, "{s}"),
284            Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")),
285
286            Expr::IndexBy(keys) => {
287                f.write_str("(ROW_NUMBER() OVER (")?;
288                if !keys.is_empty() {
289                    f.write_str("ORDER BY ")?;
290                    display_comma_separated(keys).fmt(f)?;
291                }
292                f.write_str(")-1)::int4")
293            }
294
295            Expr::Case {
296                operand,
297                cases,
298                else_result,
299            } => {
300                f.write_str("CASE")?;
301                if let Some(operand) = operand {
302                    f.write_str(" ")?;
303                    operand.fmt(f)?;
304                }
305                for case in cases {
306                    SpaceOrNewline.fmt(f)?;
307                    Indent(case).fmt(f)?;
308                }
309                if let Some(else_result) = else_result {
310                    SpaceOrNewline.fmt(f)?;
311                    Indent("ELSE").fmt(f)?;
312                    SpaceOrNewline.fmt(f)?;
313                    Indent(Indent(else_result)).fmt(f)?;
314                }
315                SpaceOrNewline.fmt(f)?;
316                f.write_str("END")
317            }
318            Expr::Subquery(s) => {
319                f.write_str("(")?;
320                SpaceOrNewline.fmt(f)?;
321                Indent(s).fmt(f)?;
322                SpaceOrNewline.fmt(f)?;
323                f.write_str(")")
324            }
325        }
326    }
327}