lialoonk_sql_query_parser/
lib.rs

1use pest::Parser;
2use pest_derive::Parser;
3use serde::{Deserialize, Serialize};
4use std::collections::{HashMap, HashSet};
5
6pub use pest::iterators::Pairs;
7
8/// Main SQL parser struct using pest grammar
9#[derive(Parser)]
10#[grammar = "grammar/grammar.pest"]
11pub struct SqlParser;
12
13/// Metadata extracted from SQL query parsing containing tables, columns, functions, etc.
14#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
15pub struct QueryMetadata {
16    /// Set of table names referenced in the query
17    pub tables: HashSet<String>,
18    /// Set of column names referenced in the query
19    pub columns: HashSet<String>,
20    /// Map of table/column aliases (alias -> original name)
21    pub aliases: HashMap<String, String>,
22    /// Set of function names used in the query
23    pub functions: HashSet<String>,
24    /// Set of aggregate function names (SUM, COUNT, AVG, etc.)
25    pub aggregates: HashSet<String>,
26    /// List of JOIN operations with their details
27    pub joins: Vec<JoinInfo>,
28}
29
30/// Information about a JOIN operation in the query
31#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
32pub struct JoinInfo {
33    /// Type of JOIN (INNER, LEFT, RIGHT, FULL, etc.)
34    pub join_type: Option<String>,
35    /// Name of the joined table
36    pub table: String,
37    /// Optional alias for the joined table
38    pub alias: Option<String>,
39    /// ON condition for the JOIN
40    pub condition: String,
41}
42
43/// Parse SQL query and return the parse tree
44///
45/// # Arguments
46/// * `input` - SQL query string to parse
47///
48/// # Returns
49/// Parse tree pairs on success, or parsing error
50#[allow(clippy::result_large_err)]
51pub fn parse_sql(
52    input: &str,
53) -> Result<pest::iterators::Pairs<'_, Rule>, pest::error::Error<Rule>> {
54    SqlParser::parse(Rule::sql, input)
55}
56
57/// Analyze SQL query and extract metadata (tables, columns, functions, etc.)
58///
59/// # Arguments
60/// * `input` - SQL query string to analyze
61///
62/// # Returns
63/// QueryMetadata struct with extracted information, or parsing error
64#[allow(clippy::result_large_err)]
65pub fn analyze_sql(input: &str) -> Result<QueryMetadata, pest::error::Error<Rule>> {
66    let pairs = SqlParser::parse(Rule::sql, input)?;
67    let mut metadata = QueryMetadata::default();
68
69    analyze_pairs(pairs, &mut metadata);
70
71    Ok(metadata)
72}
73
74/// Analyze SQL query and return metadata as pretty-printed JSON
75///
76/// # Arguments
77/// * `input` - SQL query string to analyze
78///
79/// # Returns
80/// JSON string with query metadata, or parsing/serialization error
81#[allow(clippy::result_large_err)]
82pub fn analyze_sql_json(input: &str) -> Result<String, pest::error::Error<Rule>> {
83    let metadata = analyze_sql(input)?;
84    let json = serde_json::to_string_pretty(&metadata).map_err(|e| {
85        pest::error::Error::new_from_span(
86            pest::error::ErrorVariant::CustomError {
87                message: format!("JSON serialization error: {}", e),
88            },
89            pest::Span::new(input, 0, input.len()).unwrap(),
90        )
91    })?;
92    Ok(json)
93}
94
95/// Recursively analyze parse tree pairs and extract metadata
96fn analyze_pairs(pairs: pest::iterators::Pairs<Rule>, metadata: &mut QueryMetadata) {
97    for pair in pairs {
98        match pair.as_rule() {
99            Rule::statement => analyze_pairs(pair.into_inner(), metadata),
100            Rule::select_stmt => analyze_select_stmt(pair.into_inner(), metadata),
101            Rule::insert_stmt => analyze_insert_stmt(pair.into_inner(), metadata),
102            Rule::update_stmt => analyze_update_stmt(pair.into_inner(), metadata),
103            Rule::delete_stmt => analyze_delete_stmt(pair.into_inner(), metadata),
104            _ => analyze_pairs(pair.into_inner(), metadata),
105        }
106    }
107}
108
109/// Analyze SELECT statement components
110fn analyze_select_stmt(pairs: pest::iterators::Pairs<Rule>, metadata: &mut QueryMetadata) {
111    for pair in pairs {
112        match pair.as_rule() {
113            Rule::from_item => analyze_from_item(pair.into_inner(), metadata),
114            Rule::join_clause => analyze_join_clause(pair.into_inner(), metadata),
115            Rule::projection => analyze_projection(pair.into_inner(), metadata),
116            Rule::where_clause => analyze_where_clause(pair.into_inner(), metadata),
117            _ => analyze_pairs(pair.into_inner(), metadata),
118        }
119    }
120}
121
122/// Analyze FROM clause items
123fn analyze_from_item(pairs: pest::iterators::Pairs<Rule>, metadata: &mut QueryMetadata) {
124    for pair in pairs {
125        if let Rule::table_factor = pair.as_rule() {
126            analyze_table_factor(pair.into_inner(), metadata);
127        }
128    }
129}
130
131/// Analyze table references and their aliases
132fn analyze_table_factor(pairs: pest::iterators::Pairs<Rule>, metadata: &mut QueryMetadata) {
133    let mut table_name = None;
134    let mut alias = None;
135
136    for pair in pairs {
137        match pair.as_rule() {
138            Rule::identifier if table_name.is_none() => {
139                table_name = Some(pair.as_str().to_string());
140            }
141            Rule::identifier => {
142                alias = Some(pair.as_str().to_string());
143            }
144            Rule::alias_identifier => {
145                alias = Some(pair.as_str().to_string());
146            }
147            _ => analyze_pairs(pair.into_inner(), metadata),
148        }
149    }
150
151    if let Some(table) = table_name {
152        metadata.tables.insert(table.clone());
153        if let Some(alias_name) = alias {
154            metadata.aliases.insert(alias_name, table);
155        }
156    }
157}
158
159/// Analyze JOIN clauses and extract join information
160fn analyze_join_clause(pairs: pest::iterators::Pairs<Rule>, metadata: &mut QueryMetadata) {
161    let mut join_type = None;
162    let mut table = None;
163    let mut alias = None;
164    let mut condition = String::new();
165
166    for pair in pairs {
167        match pair.as_rule() {
168            Rule::JOIN_TYPE => join_type = Some(pair.as_str().to_string()),
169            Rule::table_factor => {
170                for inner_pair in pair.into_inner() {
171                    match inner_pair.as_rule() {
172                        Rule::identifier if table.is_none() => {
173                            table = Some(inner_pair.as_str().to_string());
174                        }
175                        Rule::identifier => {
176                            alias = Some(inner_pair.as_str().to_string());
177                        }
178                        Rule::alias_identifier => {
179                            alias = Some(inner_pair.as_str().to_string());
180                        }
181                        _ => analyze_pairs(inner_pair.into_inner(), metadata),
182                    }
183                }
184            }
185            Rule::ON_KEY => {}
186            _ => {
187                condition = pair.as_str().to_string();
188                analyze_expression_for_metadata(pair.into_inner(), metadata);
189            }
190        }
191    }
192
193    if let Some(table_name) = table {
194        if let Some(alias_name) = alias.clone() {
195            metadata.aliases.insert(alias_name, table_name.clone());
196        }
197
198        metadata.joins.push(JoinInfo {
199            join_type,
200            table: table_name,
201            alias,
202            condition,
203        });
204    }
205}
206
207/// Analyze SELECT projection (column list or *)
208fn analyze_projection(pairs: pest::iterators::Pairs<Rule>, metadata: &mut QueryMetadata) {
209    for pair in pairs {
210        match pair.as_rule() {
211            Rule::projection_list => {
212                for item in pair.into_inner() {
213                    if let Rule::projection_item = item.as_rule() {
214                        analyze_projection_item(item.into_inner(), metadata);
215                    }
216                }
217            }
218            _ => analyze_pairs(pair.into_inner(), metadata),
219        }
220    }
221}
222
223/// Analyze individual projection items (columns, expressions)
224fn analyze_projection_item(pairs: pest::iterators::Pairs<Rule>, metadata: &mut QueryMetadata) {
225    for pair in pairs {
226        match pair.as_rule() {
227            Rule::expr => analyze_expression_for_metadata(pair.into_inner(), metadata),
228            _ => analyze_pairs(pair.into_inner(), metadata),
229        }
230    }
231}
232
233/// Analyze WHERE clause expressions
234fn analyze_where_clause(pairs: pest::iterators::Pairs<Rule>, metadata: &mut QueryMetadata) {
235    for pair in pairs {
236        if let Rule::expr = pair.as_rule() {
237            analyze_expression_for_metadata(pair.into_inner(), metadata);
238        }
239    }
240}
241
242/// Extract metadata from expressions (columns, functions, tables)
243fn analyze_expression_for_metadata(
244    pairs: pest::iterators::Pairs<Rule>,
245    metadata: &mut QueryMetadata,
246) {
247    for pair in pairs {
248        match pair.as_rule() {
249            Rule::column => {
250                metadata.columns.insert(pair.as_str().to_string());
251            }
252            Rule::function_call => {
253                let func_name = pair.as_str().split('(').next().unwrap_or("").to_string();
254                metadata.functions.insert(func_name.clone());
255
256                let aggregates = ["SUM", "COUNT", "AVG", "MIN", "MAX"];
257                if aggregates.contains(&func_name.to_uppercase().as_str()) {
258                    metadata.aggregates.insert(func_name);
259                }
260            }
261            Rule::identifier => {
262                if !metadata.aliases.contains_key(pair.as_str()) {
263                    metadata.tables.insert(pair.as_str().to_string());
264                }
265            }
266            _ => analyze_expression_for_metadata(pair.into_inner(), metadata),
267        }
268    }
269}
270
271/// Analyze INSERT statements
272fn analyze_insert_stmt(pairs: pest::iterators::Pairs<Rule>, metadata: &mut QueryMetadata) {
273    for pair in pairs {
274        match pair.as_rule() {
275            Rule::identifier => {
276                metadata.tables.insert(pair.as_str().to_string());
277            }
278            Rule::expr => {
279                analyze_expression_for_metadata(pair.into_inner(), metadata);
280            }
281            _ => analyze_pairs(pair.into_inner(), metadata),
282        }
283    }
284}
285
286/// Analyze UPDATE statements
287fn analyze_update_stmt(pairs: pest::iterators::Pairs<Rule>, metadata: &mut QueryMetadata) {
288    for pair in pairs {
289        match pair.as_rule() {
290            Rule::identifier => {
291                metadata.tables.insert(pair.as_str().to_string());
292            }
293            Rule::set_list => {
294                analyze_set_list(pair.into_inner(), metadata);
295            }
296            Rule::where_clause => {
297                analyze_where_clause(pair.into_inner(), metadata);
298            }
299            _ => analyze_pairs(pair.into_inner(), metadata),
300        }
301    }
302}
303
304/// Analyze DELETE statements
305fn analyze_delete_stmt(pairs: pest::iterators::Pairs<Rule>, metadata: &mut QueryMetadata) {
306    for pair in pairs {
307        match pair.as_rule() {
308            Rule::identifier => {
309                metadata.tables.insert(pair.as_str().to_string());
310            }
311            Rule::where_clause => {
312                analyze_where_clause(pair.into_inner(), metadata);
313            }
314            _ => analyze_pairs(pair.into_inner(), metadata),
315        }
316    }
317}
318
319/// Analyze SET clause in UPDATE statements
320fn analyze_set_list(pairs: pest::iterators::Pairs<Rule>, metadata: &mut QueryMetadata) {
321    for pair in pairs {
322        if let Rule::set_item = pair.as_rule() {
323            analyze_set_item(pair.into_inner(), metadata);
324        }
325    }
326}
327
328/// Analyze individual SET items (column = value)
329fn analyze_set_item(pairs: pest::iterators::Pairs<Rule>, metadata: &mut QueryMetadata) {
330    for pair in pairs {
331        match pair.as_rule() {
332            Rule::identifier => {
333                metadata.columns.insert(pair.as_str().to_string());
334            }
335            Rule::expr => {
336                analyze_expression_for_metadata(pair.into_inner(), metadata);
337            }
338            _ => analyze_pairs(pair.into_inner(), metadata),
339        }
340    }
341}