use regex::Regex;
use std::sync::OnceLock;
#[allow(clippy::expect_used)]
fn init_regex(pattern: &str) -> Regex {
Regex::new(pattern).expect("static regex pattern must be valid")
}
pub fn translate(sql: &str) -> String {
let mut result = sql.to_string();
result = translate_backslash_escapes(&result);
result = translate_backticks(&result);
result = translate_types(&result);
result = translate_auto_increment(&result);
result = translate_charset_collation(&result);
result = translate_on_duplicate_key(&result);
result = translate_replace_into(&result);
result = translate_insert_ignore(&result);
result = translate_limit_offset(&result);
result = translate_functions(&result);
result = translate_multi_table_delete(&result);
result = translate_key_indexes(&result);
result = translate_alter_table_keys(&result);
result = translate_misc(&result);
result
}
fn translate_backslash_escapes(sql: &str) -> String {
let mut out = String::with_capacity(sql.len());
let mut chars = sql.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '\'' {
out.push('\'');
process_string_interior(&mut out, &mut chars, '\'');
} else if ch == '"' && looks_like_mysql_string_context(&out) {
out.push('\'');
process_string_interior(&mut out, &mut chars, '"');
} else {
out.push(ch);
}
}
out
}
fn process_string_interior(
out: &mut String,
chars: &mut std::iter::Peekable<std::str::Chars<'_>>,
quote_char: char,
) {
loop {
match chars.next() {
Some('\\') => {
match chars.peek() {
Some('"') => {
out.push('"');
chars.next();
}
Some('\\') => {
out.push('\\');
chars.next();
}
Some('n') => {
out.push('\n');
chars.next();
}
Some('r') => {
out.push('\r');
chars.next();
}
Some('t') => {
out.push('\t');
chars.next();
}
Some('0') => {
chars.next(); }
Some('\'') => {
out.push('\'');
out.push('\'');
chars.next();
}
_ => {
out.push('\\');
}
}
}
Some(c) if c == quote_char => {
if chars.peek() == Some("e_char) {
if quote_char == '\'' {
out.push('\'');
out.push('\'');
} else {
out.push('"');
}
chars.next();
} else {
out.push('\'');
break;
}
}
Some('\'') if quote_char == '"' => {
out.push('\'');
out.push('\'');
}
Some(c) => out.push(c),
None => break,
}
}
}
fn looks_like_mysql_string_context(out: &str) -> bool {
let trimmed = out.trim_end();
trimmed.ends_with('(')
|| trimmed.ends_with(',')
|| trimmed.ends_with('=')
|| trimmed.to_uppercase().ends_with("VALUES")
|| trimmed.to_uppercase().ends_with("SET")
|| trimmed.to_uppercase().ends_with("DEFAULT")
|| trimmed.to_uppercase().ends_with("LIKE")
|| trimmed.to_uppercase().ends_with("WHERE")
|| trimmed.to_uppercase().ends_with("AND")
|| trimmed.to_uppercase().ends_with("OR")
}
fn translate_backticks(sql: &str) -> String {
let mut out = String::with_capacity(sql.len());
let mut chars = sql.chars().peekable();
while let Some(ch) = chars.next() {
match ch {
'\'' => {
out.push('\'');
loop {
match chars.next() {
Some('\\') => {
out.push('\\');
if let Some(escaped) = chars.next() {
out.push(escaped);
}
}
Some('\'') => {
if chars.peek() == Some(&'\'') {
out.push('\'');
out.push('\'');
chars.next();
} else {
out.push('\'');
break;
}
}
Some(c) => out.push(c),
None => break,
}
}
}
'`' => {},
_ => out.push(ch),
}
}
out
}
fn translate_types(sql: &str) -> String {
let mut s = sql.to_string();
static TINYINT1_RE: OnceLock<Regex> = OnceLock::new();
let re = TINYINT1_RE.get_or_init(|| init_regex(r"(?i)\bTINYINT\s*\(\s*1\s*\)"));
s = re.replace_all(&s, "BOOLEAN").to_string();
static TINYINT_RE: OnceLock<Regex> = OnceLock::new();
let re = TINYINT_RE.get_or_init(|| init_regex(r"(?i)\bTINYINT\b"));
s = re.replace_all(&s, "SMALLINT").to_string();
static MEDIUMINT_RE: OnceLock<Regex> = OnceLock::new();
let re = MEDIUMINT_RE.get_or_init(|| init_regex(r"(?i)\bMEDIUMINT\b"));
s = re.replace_all(&s, "INTEGER").to_string();
static LONGTEXT_RE: OnceLock<Regex> = OnceLock::new();
let re = LONGTEXT_RE.get_or_init(|| init_regex(r"(?i)\bLONGTEXT\b"));
s = re.replace_all(&s, "TEXT").to_string();
static MEDIUMTEXT_RE: OnceLock<Regex> = OnceLock::new();
let re = MEDIUMTEXT_RE.get_or_init(|| init_regex(r"(?i)\bMEDIUMTEXT\b"));
s = re.replace_all(&s, "TEXT").to_string();
static TINYTEXT_RE: OnceLock<Regex> = OnceLock::new();
let re = TINYTEXT_RE.get_or_init(|| init_regex(r"(?i)\bTINYTEXT\b"));
s = re.replace_all(&s, "TEXT").to_string();
static LONGBLOB_RE: OnceLock<Regex> = OnceLock::new();
let re = LONGBLOB_RE.get_or_init(|| init_regex(r"(?i)\bLONGBLOB\b"));
s = re.replace_all(&s, "BYTEA").to_string();
static MEDIUMBLOB_RE: OnceLock<Regex> = OnceLock::new();
let re = MEDIUMBLOB_RE.get_or_init(|| init_regex(r"(?i)\bMEDIUMBLOB\b"));
s = re.replace_all(&s, "BYTEA").to_string();
static TINYBLOB_RE: OnceLock<Regex> = OnceLock::new();
let re = TINYBLOB_RE.get_or_init(|| init_regex(r"(?i)\bTINYBLOB\b"));
s = re.replace_all(&s, "BYTEA").to_string();
static DATETIME_RE: OnceLock<Regex> = OnceLock::new();
let re = DATETIME_RE.get_or_init(|| init_regex(r"(?i)\bDATETIME\b"));
s = re.replace_all(&s, "TIMESTAMP").to_string();
static YEAR_RE: OnceLock<Regex> = OnceLock::new();
let re = YEAR_RE.get_or_init(|| init_regex(r"(?i)\bYEAR\b"));
s = re.replace_all(&s, "SMALLINT").to_string();
static INT_WIDTH_RE: OnceLock<Regex> = OnceLock::new();
let re = INT_WIDTH_RE
.get_or_init(|| init_regex(r"(?i)\b(INT|BIGINT|INTEGER|SMALLINT)\s*\(\s*\d+\s*\)"));
s = re.replace_all(&s, "$1").to_string();
static INT_UNSIGNED_RE: OnceLock<Regex> = OnceLock::new();
let re = INT_UNSIGNED_RE.get_or_init(|| init_regex(r"(?i)\bINT\s+UNSIGNED\b"));
s = re.replace_all(&s, "BIGINT").to_string();
static UNSIGNED_RE: OnceLock<Regex> = OnceLock::new();
let re = UNSIGNED_RE.get_or_init(|| init_regex(r"(?i)\s+UNSIGNED\b"));
s = re.replace_all(&s, "").to_string();
static DOUBLE_RE: OnceLock<Regex> = OnceLock::new();
let re = DOUBLE_RE.get_or_init(|| init_regex(r"(?i)\bDOUBLE\b"));
s = re.replace_all(&s, "DOUBLE PRECISION").to_string();
static DOUBLE_FIX_RE: OnceLock<Regex> = OnceLock::new();
let re = DOUBLE_FIX_RE
.get_or_init(|| init_regex(r"(?i)\bDOUBLE\s+PRECISION\s+PRECISION\b"));
s = re.replace_all(&s, "DOUBLE PRECISION").to_string();
static FLOAT_RE: OnceLock<Regex> = OnceLock::new();
let re = FLOAT_RE.get_or_init(|| init_regex(r"(?i)\bFLOAT\s*\(\s*\d+\s*\)"));
s = re.replace_all(&s, "REAL").to_string();
static ENUM_RE: OnceLock<Regex> = OnceLock::new();
let re = ENUM_RE.get_or_init(|| init_regex(r"(?i)\bENUM\s*\([^)]+\)"));
s = re.replace_all(&s, "TEXT").to_string();
s
}
fn translate_auto_increment(sql: &str) -> String {
static BIGINT_AI_RE: OnceLock<Regex> = OnceLock::new();
let re = BIGINT_AI_RE.get_or_init(|| {
init_regex(r"(?i)\bBIGINT\b([^,)]*?)\s+AUTO_INCREMENT\b")
});
let mut s = re.replace_all(sql, "BIGSERIAL$1").to_string();
static INT_AI_RE: OnceLock<Regex> = OnceLock::new();
let re = INT_AI_RE.get_or_init(|| {
init_regex(r"(?i)\b(?:INT|INTEGER)\b([^,)]*?)\s+AUTO_INCREMENT\b")
});
s = re.replace_all(&s, "SERIAL$1").to_string();
static LEFTOVER_AI_RE: OnceLock<Regex> = OnceLock::new();
let re = LEFTOVER_AI_RE.get_or_init(|| init_regex(r"(?i)\s*AUTO_INCREMENT\b"));
s = re.replace_all(&s, "").to_string();
s
}
fn translate_charset_collation(sql: &str) -> String {
let mut s = sql.to_string();
static DEFAULT_CHARSET_LONG_RE: OnceLock<Regex> = OnceLock::new();
let re = DEFAULT_CHARSET_LONG_RE
.get_or_init(|| init_regex(r"(?i)\s*DEFAULT\s+CHARACTER\s+SET\s*=?\s*\w+"));
s = re.replace_all(&s, "").to_string();
static DEFAULT_CHARSET_RE: OnceLock<Regex> = OnceLock::new();
let re =
DEFAULT_CHARSET_RE.get_or_init(|| init_regex(r"(?i)\s*DEFAULT\s+CHARSET\s*=?\s*\w+"));
s = re.replace_all(&s, "").to_string();
static CHARSET_LONG_RE: OnceLock<Regex> = OnceLock::new();
let re = CHARSET_LONG_RE.get_or_init(|| init_regex(r"(?i)\s*CHARACTER\s+SET\s*=?\s*\w+"));
s = re.replace_all(&s, "").to_string();
static CHARSET_SHORT_RE: OnceLock<Regex> = OnceLock::new();
let re = CHARSET_SHORT_RE.get_or_init(|| init_regex(r"(?i)\s*CHARSET\s*=?\s*\w+"));
s = re.replace_all(&s, "").to_string();
static COLLATE_RE: OnceLock<Regex> = OnceLock::new();
let re = COLLATE_RE.get_or_init(|| init_regex(r"(?i)\s*COLLATE\s*=?\s*\w+"));
s = re.replace_all(&s, "").to_string();
static ENGINE_RE: OnceLock<Regex> = OnceLock::new();
let re = ENGINE_RE.get_or_init(|| init_regex(r"(?i)\s*ENGINE\s*=\s*\w+"));
s = re.replace_all(&s, "").to_string();
s
}
fn translate_on_duplicate_key(sql: &str) -> String {
static ODK_RE: OnceLock<Regex> = OnceLock::new();
let re = ODK_RE.get_or_init(|| {
init_regex(r"(?i)\s+ON\s+DUPLICATE\s+KEY\s+UPDATE\s+(.+)$")
});
if let Some(caps) = re.captures(sql) {
let prefix = &sql[..caps.get(0).map_or(0, |m| m.start())];
let set_clause = caps.get(1).map_or("", |m| m.as_str());
let pg_set = translate_values_refs(set_clause);
format!("{prefix} ON CONFLICT DO UPDATE SET {pg_set}")
} else {
sql.to_string()
}
}
fn translate_values_refs(clause: &str) -> String {
static VALUES_REF_RE: OnceLock<Regex> = OnceLock::new();
let re = VALUES_REF_RE.get_or_init(|| init_regex(r"(?i)\bVALUES\s*\(\s*(\w+)\s*\)"));
re.replace_all(clause, "EXCLUDED.$1").to_string()
}
fn translate_replace_into(sql: &str) -> String {
static REPLACE_RE: OnceLock<Regex> = OnceLock::new();
let re = REPLACE_RE.get_or_init(|| init_regex(r"(?i)\bREPLACE\s+INTO\b"));
re.replace_all(sql, "INSERT INTO").to_string()
}
fn translate_insert_ignore(sql: &str) -> String {
static IGNORE_RE: OnceLock<Regex> = OnceLock::new();
let re = IGNORE_RE.get_or_init(|| init_regex(r"(?i)\bINSERT\s+IGNORE\s+INTO\b"));
if re.is_match(sql) {
let without_ignore = re.replace_all(sql, "INSERT INTO").to_string();
format!("{without_ignore} ON CONFLICT DO NOTHING")
} else {
sql.to_string()
}
}
fn translate_limit_offset(sql: &str) -> String {
static LIMIT_RE: OnceLock<Regex> = OnceLock::new();
let re = LIMIT_RE.get_or_init(|| init_regex(r"(?i)\bLIMIT\s+(\d+)\s*,\s*(\d+)"));
re.replace_all(sql, "LIMIT $2 OFFSET $1").to_string()
}
fn translate_functions(sql: &str) -> String {
let mut s = sql.to_string();
static GC_SEP_RE: OnceLock<Regex> = OnceLock::new();
let re = GC_SEP_RE
.get_or_init(|| init_regex(r"(?i)\bGROUP_CONCAT\s*\((.+?)\s+SEPARATOR\s+'([^']*)'\)"));
s = re.replace_all(&s, "STRING_AGG($1, '$2')").to_string();
static GC_RE: OnceLock<Regex> = OnceLock::new();
let re = GC_RE.get_or_init(|| init_regex(r"(?i)\bGROUP_CONCAT\s*\(([^)]+)\)"));
s = re.replace_all(&s, "STRING_AGG($1, ',')").to_string();
static IFNULL_RE: OnceLock<Regex> = OnceLock::new();
let re = IFNULL_RE.get_or_init(|| init_regex(r"(?i)\bIFNULL\s*\("));
s = re.replace_all(&s, "COALESCE(").to_string();
s = translate_if_function(&s);
static LOCATE_RE: OnceLock<Regex> = OnceLock::new();
let re = LOCATE_RE.get_or_init(|| init_regex(r"(?i)\bLOCATE\s*\(\s*([^,]+?)\s*,\s*([^)]+?)\s*\)"));
s = re.replace_all(&s, "POSITION($1 IN $2)").to_string();
static INSTR_RE: OnceLock<Regex> = OnceLock::new();
let re = INSTR_RE.get_or_init(|| init_regex(r"(?i)\bINSTR\s*\(\s*([^,]+?)\s*,\s*([^)]+?)\s*\)"));
s = re.replace_all(&s, "POSITION($2 IN $1)").to_string();
s
}
fn translate_if_function(sql: &str) -> String {
static IF_PREFIX_RE: OnceLock<Regex> = OnceLock::new();
let re = IF_PREFIX_RE.get_or_init(|| init_regex(r"(?i)\bIF\s*\("));
let mut result = String::with_capacity(sql.len());
let mut remaining = sql;
while let Some(m) = re.find(remaining) {
result.push_str(&remaining[..m.start()]);
let after_open = &remaining[m.end()..];
if let Some((args, rest)) = extract_balanced_args(after_open) {
let parts = split_top_level_commas(&args);
if parts.len() == 3 {
result.push_str("CASE WHEN ");
result.push_str(parts[0].trim());
result.push_str(" THEN ");
result.push_str(parts[1].trim());
result.push_str(" ELSE ");
result.push_str(parts[2].trim());
result.push_str(" END");
remaining = rest;
} else {
result.push_str(m.as_str());
remaining = after_open;
}
} else {
result.push_str(m.as_str());
remaining = after_open;
}
}
result.push_str(remaining);
result
}
fn extract_balanced_args(s: &str) -> Option<(&str, &str)> {
let mut depth: u32 = 1;
let mut in_single_quote = false;
for (i, ch) in s.char_indices() {
if in_single_quote {
if ch == '\'' {
in_single_quote = false;
}
continue;
}
match ch {
'\'' => in_single_quote = true,
'(' => depth += 1,
')' => {
depth -= 1;
if depth == 0 {
return Some((&s[..i], &s[i + 1..]));
}
}
_ => {}
}
}
None
}
fn split_top_level_commas(s: &str) -> Vec<&str> {
let mut parts = Vec::new();
let mut depth: u32 = 0;
let mut in_single_quote = false;
let mut start = 0;
for (i, ch) in s.char_indices() {
if in_single_quote {
if ch == '\'' {
in_single_quote = false;
}
continue;
}
match ch {
'\'' => in_single_quote = true,
'(' => depth += 1,
')' => {
if depth > 0 {
depth -= 1;
}
}
',' if depth == 0 => {
parts.push(&s[start..i]);
start = i + 1;
}
_ => {}
}
}
parts.push(&s[start..]);
parts
}
fn translate_multi_table_delete(sql: &str) -> String {
static MULTI_DEL_RE: OnceLock<Regex> = OnceLock::new();
let re = MULTI_DEL_RE.get_or_init(|| {
init_regex(
r"(?i)^(\s*DELETE)\s+\w+(?:\s*,\s*\w+)*\s+FROM\s+(\w+)\s+(?:AS\s+)?(\w+)\s+((?:INNER\s+)?JOIN\s+(\w+)\s+(?:AS\s+)?(\w+)\s+ON\s+(.+?))\s+(WHERE\s+.+)$"
)
});
if let Some(caps) = re.captures(sql) {
let table1 = &caps[2];
let alias1 = &caps[3];
let table2 = &caps[5];
let alias2 = &caps[6];
let on_condition = &caps[7];
let where_clause = &caps[8];
let where_body = where_clause.trim()
.strip_prefix("WHERE ")
.or_else(|| where_clause.trim().strip_prefix("where "))
.unwrap_or(where_clause.trim());
let subquery_from = format!(
"{table1} AS {alias1} INNER JOIN {table2} AS {alias2} ON {on_condition}"
);
let subquery_where = where_body;
let (col1, col2) = extract_join_columns(on_condition, alias1, alias2);
let del1 = format!(
"DELETE FROM {table1} WHERE {col1} IN (SELECT {alias1}.{col1} FROM {subquery_from} WHERE {subquery_where})"
);
let del2 = format!(
"DELETE FROM {table2} WHERE {col2} IN (SELECT {alias2}.{col2} FROM {subquery_from} WHERE {subquery_where})"
);
return format!("{del1};{del2}");
}
static COMMA_DEL_RE: OnceLock<Regex> = OnceLock::new();
let comma_re = COMMA_DEL_RE.get_or_init(|| {
init_regex(
r"(?is)^\s*DELETE\s+(\w+)\s*,\s*(\w+)\s+FROM\s+(\w+)\s+(?:AS\s+)?(\w+)\s*,\s*(\w+)\s+(?:AS\s+)?(\w+)\s+(WHERE\s+.+)$"
)
});
if let Some(caps) = comma_re.captures(sql) {
let del_alias1 = &caps[1]; let del_alias2 = &caps[2];
let table1 = &caps[3];
let alias1 = &caps[4];
let table2 = &caps[5];
let alias2 = &caps[6];
let where_clause = caps[7].trim();
let where_body = where_clause.strip_prefix("WHERE ").or_else(|| where_clause.strip_prefix("where ")).unwrap_or(where_clause);
let pk_col = "option_id";
let subquery_from = format!("{table1} AS {alias1}, {table2} AS {alias2}");
let del1 = format!(
"DELETE FROM {table1} WHERE {pk_col} IN (SELECT {del_alias1}.{pk_col} FROM {subquery_from} WHERE {where_body})"
);
if table1 != table2 {
let del2 = format!(
"DELETE FROM {table2} WHERE {pk_col} IN (SELECT {del_alias2}.{pk_col} FROM {subquery_from} WHERE {where_body})"
);
return format!("{del1};{del2}");
}
return del1;
}
sql.to_string()
}
fn extract_join_columns(on_condition: &str, alias1: &str, alias2: &str) -> (String, String) {
let (mut col1, mut col2) = (String::from("id"), String::from("id"));
if let Some((lhs, rhs)) = on_condition.split_once('=') {
let lhs = lhs.trim();
let rhs = rhs.trim();
for token in &[lhs, rhs] {
if let Some(dot) = token.find('.') {
let prefix = token.get(..dot).unwrap_or("").trim();
let suffix = token.get(dot + 1..).unwrap_or("id").trim();
if prefix.eq_ignore_ascii_case(alias1) {
col1 = suffix.to_string();
} else if prefix.eq_ignore_ascii_case(alias2) {
col2 = suffix.to_string();
}
}
}
}
(col1, col2)
}
fn translate_key_indexes(sql: &str) -> String {
static CREATE_RE: OnceLock<Regex> = OnceLock::new();
let re = CREATE_RE.get_or_init(|| init_regex(r"(?i)^\s*CREATE\s+TABLE\b"));
if !re.is_match(sql) {
return sql.to_string();
}
static UNIQUE_KEY_RE: OnceLock<Regex> = OnceLock::new();
let re = UNIQUE_KEY_RE.get_or_init(|| {
init_regex(r"(?im),\s*UNIQUE\s+KEY\s+\w+\s*\(((?:[^()]*\([^)]*\))*[^)]*)\)")
});
let mut s = re.replace_all(sql, |caps: ®ex::Captures<'_>| {
let col_list = &caps[1];
let clean_cols = strip_prefix_lengths(col_list);
format!(", UNIQUE({})", clean_cols)
}).to_string();
static KEY_LINE_RE: OnceLock<Regex> = OnceLock::new();
let re = KEY_LINE_RE.get_or_init(|| {
init_regex(r"(?im),\s*KEY\s+\w+\s*\((?:[^()]*\([^)]*\))*[^)]*\)")
});
s = re.replace_all(&s, "").to_string();
static TRAILING_COMMA_RE: OnceLock<Regex> = OnceLock::new();
let re = TRAILING_COMMA_RE.get_or_init(|| init_regex(r",\s*\)"));
s = re.replace_all(&s, ")").to_string();
s
}
fn strip_prefix_lengths(col_list: &str) -> String {
static PREFIX_LEN_RE: OnceLock<Regex> = OnceLock::new();
let re = PREFIX_LEN_RE.get_or_init(|| init_regex(r"\(\d+\)"));
re.replace_all(col_list, "").to_string()
}
fn translate_alter_table_keys(sql: &str) -> String {
let upper = sql.trim().to_uppercase();
if !upper.starts_with("ALTER TABLE") {
return sql.to_string();
}
static ALTER_KEY_RE: OnceLock<Regex> = OnceLock::new();
let re = ALTER_KEY_RE.get_or_init(|| {
init_regex(r"(?i)\bADD\s+(?:UNIQUE\s+)?(?:KEY|INDEX)\s+\w+\s*\((?:[^()]*\([^)]*\))*[^)]*\)")
});
if re.is_match(sql) {
return String::new();
}
sql.to_string()
}
fn translate_misc(sql: &str) -> String {
let mut s = sql.to_string();
static EXEC_COMMENT_RE: OnceLock<Regex> = OnceLock::new();
let re = EXEC_COMMENT_RE.get_or_init(|| init_regex(r"/\*![\s\S]*?\*/"));
s = re.replace_all(&s, "").to_string();
static STRAIGHT_JOIN_RE: OnceLock<Regex> = OnceLock::new();
let re = STRAIGHT_JOIN_RE.get_or_init(|| init_regex(r"(?i)\bSTRAIGHT_JOIN\b"));
s = re.replace_all(&s, "JOIN").to_string();
static CALC_FOUND_RE: OnceLock<Regex> = OnceLock::new();
let re = CALC_FOUND_RE.get_or_init(|| init_regex(r"(?i)\bSQL_CALC_FOUND_ROWS\s*"));
s = re.replace_all(&s, "").to_string();
static PRIORITY_RE: OnceLock<Regex> = OnceLock::new();
let re =
PRIORITY_RE.get_or_init(|| init_regex(r"(?i)\b(?:HIGH_PRIORITY|LOW_PRIORITY|DELAYED)\b"));
s = re.replace_all(&s, "").to_string();
static BINARY_RE: OnceLock<Regex> = OnceLock::new();
let re = BINARY_RE.get_or_init(|| init_regex(r"(?i)\bBINARY\s+(')"));
s = re.replace_all(&s, "$1").to_string();
static DATE_INTERVAL_RE: OnceLock<Regex> = OnceLock::new();
let re = DATE_INTERVAL_RE.get_or_init(|| {
init_regex(r"(?i)\b(DATE_(?:ADD|SUB))\s*\(\s*([^,]+)\s*,\s*INTERVAL\s+(\d+)\s+(\w+)\s*\)")
});
s = re.replace_all(&s, "$1($2, '$3 $4')").to_string();
static WHERE_1_1_AND_RE: OnceLock<Regex> = OnceLock::new();
let re = WHERE_1_1_AND_RE.get_or_init(|| init_regex(r"(?i)\bWHERE\s+1\s*=\s*1\s+AND\b"));
s = re.replace_all(&s, "WHERE").to_string();
static WHERE_1_1_RE: OnceLock<Regex> = OnceLock::new();
let re = WHERE_1_1_RE.get_or_init(|| init_regex(r"(?i)\bWHERE\s+1\s*=\s*1\s*$"));
s = re.replace_all(&s, "").to_string();
s
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_auto_increment() {
let sql =
"CREATE TABLE wp_posts (ID bigint(20) NOT NULL AUTO_INCREMENT, PRIMARY KEY (ID))";
let result = translate(sql);
assert!(
result.contains("BIGSERIAL"),
"Should convert to BIGSERIAL: {result}"
);
assert!(
!result.contains("AUTO_INCREMENT"),
"Should remove AUTO_INCREMENT: {result}"
);
}
#[test]
fn test_on_duplicate_key() {
let sql = "INSERT INTO wp_options (option_name, option_value) VALUES ('siteurl', 'http://example.com') ON DUPLICATE KEY UPDATE option_value = VALUES(option_value)";
let result = translate(sql);
assert!(
!result.contains("ON DUPLICATE KEY"),
"Should strip ON DUPLICATE KEY UPDATE: {result}"
);
assert!(
result.contains("ON CONFLICT DO UPDATE SET"),
"Should produce ON CONFLICT DO UPDATE SET: {result}"
);
assert!(
result.contains("EXCLUDED.option_value"),
"Should translate VALUES(col) to EXCLUDED.col: {result}"
);
}
#[test]
fn test_mysql_types() {
let sql = "CREATE TABLE t (a LONGTEXT, b MEDIUMTEXT, c TINYINT(1), d BIGINT(20) UNSIGNED, e DATETIME)";
let result = translate(sql);
assert!(result.contains("TEXT"), "LONGTEXT -> TEXT");
assert!(result.contains("BOOLEAN"), "TINYINT(1) -> BOOLEAN");
assert!(result.contains("TIMESTAMP"), "DATETIME -> TIMESTAMP");
assert!(!result.contains("UNSIGNED"), "UNSIGNED should be stripped");
}
#[test]
fn test_charset_stripped() {
let sql = "CREATE TABLE t (id INT) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci";
let result = translate(sql);
assert!(!result.contains("ENGINE"), "ENGINE should be stripped");
assert!(!result.contains("CHARSET"), "CHARSET should be stripped");
assert!(!result.contains("COLLATE"), "COLLATE should be stripped");
}
#[test]
fn test_backtick_stripped() {
let sql = "SELECT `id`, `name` FROM `wp_posts` WHERE `status` = 'publish'";
let result = translate(sql);
assert!(!result.contains('`'), "Backticks should be stripped");
assert!(result.contains("id"), "Identifier should remain");
assert!(result.contains("wp_posts"), "Table name should remain");
assert!(
result.contains("'publish'"),
"String literals should be preserved"
);
}
#[test]
fn test_insert_ignore() {
let sql = "INSERT IGNORE INTO t (id, name) VALUES (1, 'test')";
let result = translate(sql);
assert!(
result.contains("ON CONFLICT DO NOTHING"),
"INSERT IGNORE -> ON CONFLICT DO NOTHING"
);
assert!(!result.contains("IGNORE"), "IGNORE should be removed");
}
#[test]
fn test_limit_offset_mysql() {
let sql = "SELECT * FROM t LIMIT 10, 20";
let result = translate(sql);
assert!(
result.contains("LIMIT 20 OFFSET 10"),
"Should swap LIMIT args: {result}"
);
}
#[test]
fn test_limit_single_arg_unchanged() {
let sql = "SELECT * FROM t LIMIT 10";
let result = translate(sql);
assert!(
result.contains("LIMIT 10"),
"Single LIMIT should be unchanged"
);
}
#[test]
fn test_replace_into() {
let sql = "REPLACE INTO t (id, name) VALUES (1, 'foo')";
let result = translate(sql);
assert!(
result.starts_with("INSERT INTO"),
"REPLACE INTO -> INSERT INTO: {result}"
);
assert!(
!result.contains("REPLACE"),
"REPLACE should be removed: {result}"
);
}
#[test]
fn test_group_concat() {
let sql = "SELECT GROUP_CONCAT(name) FROM t";
let result = translate(sql);
assert!(
result.contains("STRING_AGG(name, ',')"),
"GROUP_CONCAT -> STRING_AGG: {result}"
);
}
#[test]
fn test_group_concat_separator() {
let sql = "SELECT GROUP_CONCAT(name SEPARATOR ';') FROM t";
let result = translate(sql);
assert!(
result.contains("STRING_AGG(name, ';')"),
"GROUP_CONCAT with SEPARATOR: {result}"
);
}
#[test]
fn test_ifnull() {
let sql = "SELECT IFNULL(name, 'unknown') FROM t";
let result = translate(sql);
assert!(
result.contains("COALESCE(name, 'unknown')"),
"IFNULL -> COALESCE: {result}"
);
}
#[test]
fn test_if_function() {
let sql = "SELECT IF(a > 0, 'pos', 'neg') FROM t";
let result = translate(sql);
assert!(
result.contains("CASE WHEN a > 0 THEN 'pos' ELSE 'neg' END"),
"IF -> CASE WHEN: {result}"
);
}
#[test]
fn test_locate() {
let sql = "SELECT LOCATE('bar', col1) FROM t";
let result = translate(sql);
assert!(
result.contains("POSITION('bar' IN col1)"),
"LOCATE -> POSITION: {result}"
);
}
#[test]
fn test_instr() {
let sql = "SELECT INSTR(col1, 'bar') FROM t";
let result = translate(sql);
assert!(
result.contains("POSITION('bar' IN col1)"),
"INSTR -> POSITION: {result}"
);
}
#[test]
fn test_straight_join() {
let sql = "SELECT * FROM t1 STRAIGHT_JOIN t2 ON t1.id = t2.id";
let result = translate(sql);
assert!(
result.contains("JOIN") && !result.contains("STRAIGHT_JOIN"),
"STRAIGHT_JOIN -> JOIN: {result}"
);
}
#[test]
fn test_sql_calc_found_rows() {
let sql = "SELECT SQL_CALC_FOUND_ROWS * FROM t LIMIT 10";
let result = translate(sql);
assert!(
!result.contains("SQL_CALC_FOUND_ROWS"),
"SQL_CALC_FOUND_ROWS should be stripped: {result}"
);
}
#[test]
fn test_int_unsigned_promotion() {
let sql = "CREATE TABLE t (id INT UNSIGNED)";
let result = translate(sql);
assert!(
result.contains("BIGINT"),
"INT UNSIGNED -> BIGINT: {result}"
);
assert!(
!result.contains("UNSIGNED"),
"UNSIGNED should be stripped: {result}"
);
}
#[test]
fn test_double_to_double_precision() {
let sql = "CREATE TABLE t (val DOUBLE)";
let result = translate(sql);
assert!(
result.contains("DOUBLE PRECISION"),
"DOUBLE -> DOUBLE PRECISION: {result}"
);
}
#[test]
fn test_float_precision_stripped() {
let sql = "CREATE TABLE t (val FLOAT(10))";
let result = translate(sql);
assert!(result.contains("REAL"), "FLOAT(N) -> REAL: {result}");
}
#[test]
fn test_enum_to_text() {
let sql = "CREATE TABLE t (status ENUM('active','inactive'))";
let result = translate(sql);
assert!(result.contains("TEXT"), "ENUM -> TEXT: {result}");
assert!(
!result.contains("ENUM"),
"ENUM should be removed: {result}"
);
}
#[test]
fn test_serial_auto_increment() {
let sql = "CREATE TABLE t (id INT NOT NULL AUTO_INCREMENT)";
let result = translate(sql);
assert!(
result.contains("SERIAL"),
"INT AUTO_INCREMENT -> SERIAL: {result}"
);
assert!(
!result.contains("AUTO_INCREMENT"),
"AUTO_INCREMENT should be removed: {result}"
);
}
#[test]
fn test_year_to_smallint() {
let sql = "CREATE TABLE t (yr YEAR)";
let result = translate(sql);
assert!(
result.contains("SMALLINT"),
"YEAR -> SMALLINT: {result}"
);
}
#[test]
fn test_executable_comments_stripped() {
let sql = "SELECT /*!40001 SQL_NO_CACHE */ * FROM t";
let result = translate(sql);
assert!(
!result.contains("/*!"),
"Executable comments should be stripped: {result}"
);
}
#[test]
fn test_high_priority_stripped() {
let sql = "SELECT HIGH_PRIORITY * FROM t";
let result = translate(sql);
assert!(
!result.contains("HIGH_PRIORITY"),
"HIGH_PRIORITY should be stripped: {result}"
);
}
#[test]
fn test_complex_wordpress_create() {
let sql = "CREATE TABLE `wp_options` (
`option_id` bigint(20) UNSIGNED NOT NULL AUTO_INCREMENT,
`option_name` varchar(191) COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT '',
`option_value` longtext COLLATE utf8mb4_unicode_ci NOT NULL,
`autoload` varchar(20) COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT 'yes',
PRIMARY KEY (`option_id`),
UNIQUE KEY `option_name` (`option_name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci";
let result = translate(sql);
assert!(!result.contains('`'), "Backticks should be replaced");
assert!(
result.contains("BIGSERIAL"),
"BIGINT UNSIGNED AUTO_INCREMENT -> BIGSERIAL: {result}"
);
assert!(result.contains("TEXT"), "LONGTEXT -> TEXT");
assert!(!result.contains("ENGINE"), "ENGINE stripped");
assert!(!result.contains("CHARSET"), "CHARSET stripped");
assert!(!result.contains("COLLATE"), "COLLATE stripped");
assert!(
result.contains("UNIQUE(option_name)"),
"UNIQUE KEY should become UNIQUE(option_name): {result}"
);
}
#[test]
fn test_case_insensitive_types() {
let sql = "CREATE TABLE t (a longtext, b datetime, c tinyint(1))";
let result = translate(sql);
assert!(result.contains("TEXT"), "lowercase LONGTEXT -> TEXT");
assert!(
result.contains("TIMESTAMP"),
"lowercase DATETIME -> TIMESTAMP"
);
assert!(
result.contains("BOOLEAN"),
"lowercase TINYINT(1) -> BOOLEAN"
);
}
#[test]
fn test_backtick_preserves_string_contents() {
let sql = "SELECT `col` FROM t WHERE val = 'it`s a test'";
let result = translate(sql);
assert!(
result.contains("col"),
"Backtick identifier stripped: {result}"
);
assert!(
result.contains("'it`s a test'"),
"Backtick inside string literal preserved: {result}"
);
assert!(
!result.contains("`col`"),
"Identifier backticks should be stripped: {result}"
);
}
#[test]
fn test_limit_offset_no_false_positive() {
let sql = "SELECT * FROM t LIMIT 5";
let result = translate(sql);
assert_eq!(
result.contains("OFFSET"),
false,
"Single LIMIT must not gain OFFSET: {result}"
);
}
#[test]
fn test_mediumint() {
let sql = "CREATE TABLE t (id MEDIUMINT)";
let result = translate(sql);
assert!(
result.contains("INTEGER"),
"MEDIUMINT -> INTEGER: {result}"
);
}
#[test]
fn test_blob_types() {
let sql = "CREATE TABLE t (a LONGBLOB, b MEDIUMBLOB, c TINYBLOB)";
let result = translate(sql);
let count = result.matches("BYTEA").count();
assert_eq!(count, 3, "All BLOB variants -> BYTEA: {result}");
}
#[test]
fn test_multi_table_delete() {
let sql = "DELETE t, tt FROM wp_terms AS t INNER JOIN wp_term_taxonomy AS tt ON t.term_id = tt.term_id WHERE tt.taxonomy = 'nav_menu'";
let result = translate(sql);
let parts: Vec<&str> = result.split(';').collect();
assert!(
parts.len() >= 2,
"Should produce two DELETE statements: {result}"
);
assert!(
parts[0].contains("DELETE FROM wp_terms WHERE term_id IN"),
"First DELETE should target wp_terms with IN subquery: {result}"
);
assert!(
parts[1].contains("DELETE FROM wp_term_taxonomy WHERE term_id IN"),
"Second DELETE should target wp_term_taxonomy with IN subquery: {result}"
);
assert!(
result.contains("INNER JOIN"),
"Subquery should contain JOIN: {result}"
);
}
#[test]
fn test_multi_table_delete_no_alias_keyword() {
let sql = "DELETE a, b FROM wp_terms a JOIN wp_term_taxonomy b ON a.term_id = b.term_id WHERE b.count = 0";
let result = translate(sql);
let parts: Vec<&str> = result.split(';').collect();
assert!(
parts.len() >= 2,
"Should handle JOIN without AS keyword: {result}"
);
assert!(
parts[0].contains("DELETE FROM wp_terms"),
"First DELETE should target wp_terms: {result}"
);
}
#[test]
fn test_key_index_stripped() {
let sql = "CREATE TABLE wp_options (
option_id BIGSERIAL NOT NULL,
option_name varchar(191) NOT NULL DEFAULT '',
PRIMARY KEY (option_id),
KEY option_name (option_name(191))
)";
let result = translate(sql);
assert!(
!result.contains("KEY option_name"),
"KEY index should be stripped: {result}"
);
assert!(
result.contains("PRIMARY KEY"),
"PRIMARY KEY should be preserved: {result}"
);
}
#[test]
fn test_unique_key_converted() {
let sql = "CREATE TABLE wp_users (
ID BIGSERIAL NOT NULL,
user_email varchar(100),
PRIMARY KEY (ID),
UNIQUE KEY user_email (user_email),
KEY user_login (user_login)
)";
let result = translate(sql);
assert!(
!result.contains("UNIQUE KEY"),
"UNIQUE KEY syntax should be removed: {result}"
);
assert!(
result.contains("UNIQUE(user_email)"),
"UNIQUE KEY should be converted to UNIQUE constraint: {result}"
);
assert!(
!result.contains("KEY user_login"),
"Plain KEY should be stripped: {result}"
);
assert!(
result.contains("PRIMARY KEY"),
"PRIMARY KEY should be preserved: {result}"
);
}
#[test]
fn test_key_index_no_false_positive() {
let sql = "SELECT * FROM t WHERE KEY = 'value'";
let result = translate(sql);
assert_eq!(result, sql, "Non-CREATE should be unchanged: {result}");
}
#[test]
fn test_wordpress_full_create_with_keys() {
let sql = "CREATE TABLE `wp_posts` (
`ID` bigint(20) UNSIGNED NOT NULL AUTO_INCREMENT,
`post_title` text COLLATE utf8mb4_unicode_ci NOT NULL,
PRIMARY KEY (`ID`),
KEY `post_name` (`post_name`(191)),
KEY `type_status_date` (`post_type`,`post_status`,`post_date`,`ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci";
let result = translate(sql);
assert!(
!result.contains("KEY post_name"),
"KEY indexes should be stripped: {result}"
);
assert!(
!result.contains("KEY type_status_date"),
"Composite KEY indexes should be stripped: {result}"
);
assert!(
result.contains("PRIMARY KEY"),
"PRIMARY KEY should be preserved: {result}"
);
assert!(
result.contains("BIGSERIAL"),
"AUTO_INCREMENT should become BIGSERIAL: {result}"
);
}
}