Skip to main content

polyglot_sql/
lib.rs

1//! Polyglot Core - SQL parsing and dialect translation library
2//!
3//! This library provides the core functionality for parsing SQL statements,
4//! building an abstract syntax tree (AST), and generating SQL in different dialects.
5//!
6//! # Architecture
7//!
8//! The library follows a pipeline architecture:
9//! 1. **Tokenizer** - Converts SQL string to token stream
10//! 2. **Parser** - Builds AST from tokens
11//! 3. **Generator** - Converts AST back to SQL string
12//!
13//! Each stage can be customized per dialect.
14
15pub mod ast_transforms;
16pub mod builder;
17pub mod dialects;
18pub mod diff;
19pub mod error;
20pub mod expressions;
21pub mod generator;
22pub mod helper;
23pub mod lineage;
24pub mod optimizer;
25pub mod parser;
26pub mod planner;
27pub mod resolver;
28pub mod schema;
29pub mod scope;
30pub mod time;
31pub mod tokens;
32pub mod transforms;
33pub mod traversal;
34pub mod trie;
35
36pub use dialects::{Dialect, DialectType, CustomDialectBuilder, unregister_custom_dialect};
37pub use error::{Error, Result, ValidationError, ValidationResult, ValidationSeverity};
38pub use expressions::Expression;
39pub use generator::Generator;
40pub use helper::{
41    csv, find_new_name, is_date_unit, is_float, is_int, is_iso_date, is_iso_datetime,
42    merge_ranges, name_sequence, seq_get, split_num_words, tsort, while_changing, DATE_UNITS,
43};
44pub use parser::Parser;
45pub use resolver::{is_column_ambiguous, resolve_column, Resolver, ResolverError, ResolverResult};
46pub use schema::{ensure_schema, from_simple_map, normalize_name, MappingSchema, Schema, SchemaError};
47pub use scope::{
48    build_scope, find_all_in_scope, find_in_scope, traverse_scope, walk_in_scope, ColumnRef, Scope,
49    ScopeType, SourceInfo,
50};
51pub use time::{format_time, is_valid_timezone, subsecond_precision, TIMEZONES};
52pub use tokens::{Token, TokenType, Tokenizer};
53pub use traversal::{
54    contains_aggregate, contains_subquery, contains_window_function, find_ancestor, find_parent,
55    get_columns, get_tables, is_aggregate, is_column, is_function, is_literal, is_select,
56    is_subquery, is_window_function, transform, transform_map, BfsIter, DfsIter, ExpressionWalk,
57    ParentInfo, TreeContext,
58    // Extended type predicates
59    is_insert, is_update, is_delete, is_union, is_intersect, is_except,
60    is_boolean, is_null_literal, is_star, is_identifier, is_table,
61    is_eq, is_neq, is_lt, is_lte, is_gt, is_gte, is_like, is_ilike,
62    is_add, is_sub, is_mul, is_div, is_mod, is_concat,
63    is_and, is_or, is_not,
64    is_in, is_between, is_is_null, is_exists,
65    is_count, is_sum, is_avg, is_min_func, is_max_func, is_coalesce, is_null_if,
66    is_cast, is_try_cast, is_safe_cast, is_case,
67    is_from, is_join, is_where, is_group_by, is_having, is_order_by, is_limit, is_offset,
68    is_with, is_cte, is_alias, is_paren, is_ordered,
69    is_create_table, is_drop_table, is_alter_table, is_create_index, is_drop_index,
70    is_create_view, is_drop_view,
71    // Composite predicates
72    is_query, is_set_operation, is_comparison, is_arithmetic, is_logical, is_ddl,
73};
74pub use ast_transforms::{
75    add_select_columns, remove_select_columns, set_distinct,
76    add_where, remove_where,
77    set_limit, set_offset, remove_limit_offset,
78    rename_columns, rename_tables, qualify_columns,
79    replace_nodes, replace_by_type, remove_nodes,
80    get_column_names, get_table_names, get_identifiers, get_functions, get_literals,
81    get_subqueries, get_aggregate_functions, get_window_functions, node_count,
82};
83pub use trie::{new_trie, new_trie_from_keys, Trie, TrieResult};
84pub use optimizer::{annotate_types, TypeAnnotator, TypeCoercionClass};
85
86/// Transpile SQL from one dialect to another.
87///
88/// # Arguments
89/// * `sql` - The SQL string to transpile
90/// * `read` - The source dialect to parse with
91/// * `write` - The target dialect to generate
92///
93/// # Returns
94/// A vector of transpiled SQL statements
95///
96/// # Example
97/// ```
98/// use polyglot_sql::{transpile, DialectType};
99///
100/// let result = transpile(
101///     "SELECT EPOCH_MS(1618088028295)",
102///     DialectType::DuckDB,
103///     DialectType::Hive
104/// );
105/// ```
106pub fn transpile(sql: &str, read: DialectType, write: DialectType) -> Result<Vec<String>> {
107    let read_dialect = Dialect::get(read);
108    let write_dialect = Dialect::get(write);
109
110    let expressions = read_dialect.parse(sql)?;
111
112    expressions
113        .into_iter()
114        .map(|expr| {
115            let transformed = write_dialect.transform(expr)?;
116            write_dialect.generate(&transformed)
117        })
118        .collect()
119}
120
121/// Parse SQL into an AST.
122///
123/// # Arguments
124/// * `sql` - The SQL string to parse
125/// * `dialect` - The dialect to use for parsing
126///
127/// # Returns
128/// A vector of parsed expressions
129pub fn parse(sql: &str, dialect: DialectType) -> Result<Vec<Expression>> {
130    let d = Dialect::get(dialect);
131    d.parse(sql)
132}
133
134/// Parse a single SQL statement.
135///
136/// # Arguments
137/// * `sql` - The SQL string containing a single statement
138/// * `dialect` - The dialect to use for parsing
139///
140/// # Returns
141/// The parsed expression, or an error if multiple statements found
142pub fn parse_one(sql: &str, dialect: DialectType) -> Result<Expression> {
143    let mut expressions = parse(sql, dialect)?;
144
145    if expressions.len() != 1 {
146        return Err(Error::Parse(format!(
147            "Expected 1 statement, found {}",
148            expressions.len()
149        )));
150    }
151
152    Ok(expressions.remove(0))
153}
154
155/// Generate SQL from an AST.
156///
157/// # Arguments
158/// * `expression` - The expression to generate SQL from
159/// * `dialect` - The target dialect
160///
161/// # Returns
162/// The generated SQL string
163pub fn generate(expression: &Expression, dialect: DialectType) -> Result<String> {
164    let d = Dialect::get(dialect);
165    d.generate(expression)
166}
167
168/// Validate SQL syntax.
169///
170/// # Arguments
171/// * `sql` - The SQL string to validate
172/// * `dialect` - The dialect to use for validation
173///
174/// # Returns
175/// A validation result with any errors found
176pub fn validate(sql: &str, dialect: DialectType) -> ValidationResult {
177    let d = Dialect::get(dialect);
178    match d.parse(sql) {
179        Ok(_) => ValidationResult::success(),
180        Err(e) => {
181            let error = match &e {
182                Error::Syntax {
183                    message,
184                    line,
185                    column,
186                } => ValidationError::error(message.clone(), "E001").with_location(*line, *column),
187                Error::Tokenize {
188                    message,
189                    line,
190                    column,
191                } => ValidationError::error(message.clone(), "E002").with_location(*line, *column),
192                Error::Parse(msg) => ValidationError::error(msg.clone(), "E003"),
193                _ => ValidationError::error(e.to_string(), "E000"),
194            };
195            ValidationResult::with_errors(vec![error])
196        }
197    }
198}
199
200/// Transpile SQL from one dialect to another, using string dialect names.
201///
202/// This supports both built-in dialect names (e.g., "postgresql", "mysql") and
203/// custom dialects registered via [`CustomDialectBuilder`].
204///
205/// # Arguments
206/// * `sql` - The SQL string to transpile
207/// * `read` - The source dialect name
208/// * `write` - The target dialect name
209///
210/// # Returns
211/// A vector of transpiled SQL statements, or an error if a dialect name is unknown.
212pub fn transpile_by_name(sql: &str, read: &str, write: &str) -> Result<Vec<String>> {
213    let read_dialect = Dialect::get_by_name(read)
214        .ok_or_else(|| Error::parse(format!("Unknown dialect: {}", read)))?;
215    let write_dialect = Dialect::get_by_name(write)
216        .ok_or_else(|| Error::parse(format!("Unknown dialect: {}", write)))?;
217
218    let expressions = read_dialect.parse(sql)?;
219
220    expressions
221        .into_iter()
222        .map(|expr| {
223            let transformed = write_dialect.transform(expr)?;
224            write_dialect.generate(&transformed)
225        })
226        .collect()
227}
228
229/// Parse SQL into an AST using a string dialect name.
230///
231/// Supports both built-in and custom dialect names.
232pub fn parse_by_name(sql: &str, dialect: &str) -> Result<Vec<Expression>> {
233    let d = Dialect::get_by_name(dialect)
234        .ok_or_else(|| Error::parse(format!("Unknown dialect: {}", dialect)))?;
235    d.parse(sql)
236}
237
238/// Generate SQL from an AST using a string dialect name.
239///
240/// Supports both built-in and custom dialect names.
241pub fn generate_by_name(expression: &Expression, dialect: &str) -> Result<String> {
242    let d = Dialect::get_by_name(dialect)
243        .ok_or_else(|| Error::parse(format!("Unknown dialect: {}", dialect)))?;
244    d.generate(expression)
245}