use databend_common_ast::parser::token::{TokenKind, Tokenizer};
pub struct SqlParser {
delimiter: char,
multi_line: bool,
is_repl: bool,
}
impl SqlParser {
pub fn new(delimiter: char, multi_line: bool, is_repl: bool) -> Self {
Self {
delimiter,
multi_line,
is_repl,
}
}
pub fn parse(&self, sql_text: &str) -> Vec<String> {
let mut queries = Vec::new();
let mut current_query = String::new();
for line in sql_text.lines() {
let line = line.trim();
if line.is_empty() {
continue;
}
if current_query.is_empty()
&& (line.starts_with('!')
|| line == "exit"
|| line == "quit"
|| line.to_uppercase().starts_with("PUT"))
{
queries.push(line.to_owned());
continue;
}
if !self.multi_line {
if line.starts_with("--") {
continue;
} else {
queries.push(line.to_owned());
continue;
}
}
if !current_query.is_empty() {
current_query.push('\n');
}
current_query.push_str(line);
let parsed = self.parse_statements(¤t_query);
for statement in parsed.statements {
queries.push(statement);
}
current_query = parsed.remaining;
}
if !current_query.is_empty() {
let trimmed = current_query.trim();
if !trimmed.is_empty() && trimmed != self.delimiter.to_string() {
queries.push(trimmed.to_string());
}
}
queries
}
pub fn parse_line(
&self,
line: &str,
query_buffer: &mut String,
err: &mut String,
) -> Vec<String> {
if line.is_empty() {
return vec![];
}
if query_buffer.is_empty()
&& (line.starts_with('!')
|| line == "exit"
|| line == "quit"
|| line.to_uppercase().starts_with("PUT"))
{
return vec![line.to_owned()];
}
if !self.multi_line {
if line.starts_with("--") {
return vec![];
} else {
return vec![line.to_owned()];
}
}
if !query_buffer.is_empty() {
query_buffer.push('\n');
}
query_buffer.push_str(line);
let parsed = self.parse_statements(query_buffer);
*err = parsed.err;
*query_buffer = parsed.remaining;
parsed.statements
}
fn unclosed_block_comment_start(s: &str) -> Option<usize> {
let mut in_block_comment = false;
let mut open_pos = None;
let mut in_single_quote = false;
let mut in_double_quote = false;
let mut in_dollar_quote = false;
let mut in_line_comment = false;
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
let c = bytes[i];
if c == b'\n' {
in_line_comment = false;
i += 1;
continue;
}
if in_line_comment {
i += 1;
continue;
}
if in_block_comment {
if c == b'*' && i + 1 < bytes.len() && bytes[i + 1] == b'/' {
in_block_comment = false;
open_pos = None;
i += 2;
} else {
i += 1;
}
continue;
}
if in_dollar_quote {
if c == b'$' && i + 1 < bytes.len() && bytes[i + 1] == b'$' {
in_dollar_quote = false;
i += 2;
} else {
i += 1;
}
continue;
}
match c {
b'\'' if !in_double_quote => in_single_quote = !in_single_quote,
b'"' if !in_single_quote => in_double_quote = !in_double_quote,
b'$' if !in_single_quote
&& !in_double_quote
&& i + 1 < bytes.len()
&& bytes[i + 1] == b'$' =>
{
in_dollar_quote = true;
i += 2;
continue;
}
b'-' if !in_single_quote
&& !in_double_quote
&& i + 1 < bytes.len()
&& bytes[i + 1] == b'-' =>
{
in_line_comment = true;
i += 2;
continue;
}
b'/' if !in_single_quote
&& !in_double_quote
&& i + 1 < bytes.len()
&& bytes[i + 1] == b'*' =>
{
in_block_comment = true;
open_pos = Some(i);
i += 2;
continue;
}
_ => {}
}
i += 1;
}
if in_block_comment {
open_pos
} else {
None
}
}
fn parse_statements(&self, query: &str) -> ParseResult {
let (to_parse, comment_tail) = match Self::unclosed_block_comment_start(query) {
Some(pos) => (&query[..pos], &query[pos..]),
None => (query, ""),
};
let mut statements = Vec::new();
let mut remaining_query = to_parse.to_string();
let mut err = String::new();
'Parser: loop {
let mut is_valid = true;
let tokenizer = Tokenizer::new(&remaining_query);
let mut previous_token_backslash = false;
for token in tokenizer {
match token {
Ok(token) => {
let is_end_query = token.text() == self.delimiter.to_string();
let is_slash_g = self.is_repl
&& (previous_token_backslash
&& token.kind == TokenKind::Ident
&& token.text() == "G")
|| (token.text().ends_with("\\G"));
if is_end_query || is_slash_g {
let (sql, remain) = remaining_query.split_at(token.span.end as usize);
if is_valid
&& !sql.is_empty()
&& sql.trim() != self.delimiter.to_string()
{
let sql = sql.trim_end_matches(self.delimiter);
statements.push(sql.trim().to_string());
}
remaining_query = remain.to_string();
continue 'Parser;
}
previous_token_backslash = matches!(token.kind, TokenKind::Backslash);
}
Err(e) => {
is_valid = false;
err = e.to_string();
continue;
}
}
}
break;
}
if !comment_tail.is_empty() {
remaining_query.push_str(comment_tail);
}
ParseResult {
statements,
remaining: remaining_query,
err,
}
}
}
struct ParseResult {
statements: Vec<String>,
remaining: String,
err: String,
}
pub fn parse_sql_for_web(sql_text: &str) -> Vec<String> {
let parser = SqlParser::new(';', true, false);
parser.parse(sql_text)
}