pub mod copy_to_insert;
mod types;
mod warnings;
#[allow(unused_imports)]
pub use copy_to_insert::{
copy_to_inserts, parse_copy_data, parse_copy_header, CopyHeader, CopyValue,
};
use crate::parser::{Parser, SqlDialect, StatementType};
use crate::progress::ProgressReader;
use crate::splitter::Compression;
use indicatif::{ProgressBar, ProgressStyle};
use std::fs::File;
use std::io::{BufWriter, Read, Write};
use std::path::PathBuf;
pub use types::TypeMapper;
pub use warnings::{print_warnings_summary, ConvertWarning, WarningCollector};
#[derive(Debug)]
pub struct ConvertConfig {
pub input: PathBuf,
pub output: Option<PathBuf>,
pub from_dialect: Option<SqlDialect>,
pub to_dialect: SqlDialect,
pub dry_run: bool,
pub progress: bool,
pub strict: bool,
}
impl Default for ConvertConfig {
fn default() -> Self {
Self {
input: PathBuf::new(),
output: None,
from_dialect: None,
to_dialect: SqlDialect::Postgres,
dry_run: false,
progress: false,
strict: false,
}
}
}
#[derive(Debug, Default, serde::Serialize)]
pub struct ConvertStats {
pub statements_processed: u64,
pub statements_converted: u64,
pub statements_unchanged: u64,
pub statements_skipped: u64,
pub warnings: Vec<ConvertWarning>,
}
pub struct Converter {
from: SqlDialect,
to: SqlDialect,
warnings: WarningCollector,
strict: bool,
pending_copy_header: Option<CopyHeader>,
}
impl Converter {
pub fn new(from: SqlDialect, to: SqlDialect) -> Self {
Self {
from,
to,
warnings: WarningCollector::new(),
strict: false,
pending_copy_header: None,
}
}
pub fn with_strict(mut self, strict: bool) -> Self {
self.strict = strict;
self
}
pub fn has_pending_copy(&self) -> bool {
self.pending_copy_header.is_some()
}
pub fn process_copy_data(&mut self, data: &[u8]) -> Result<Vec<Vec<u8>>, ConvertWarning> {
if let Some(header) = self.pending_copy_header.take() {
if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
let inserts = copy_to_inserts(&header, data, self.to);
return Ok(inserts);
}
}
Ok(vec![data.to_vec()])
}
pub fn convert_statement(&mut self, stmt: &[u8]) -> Result<Vec<u8>, ConvertWarning> {
let (stmt_type, table_name) =
Parser::<&[u8]>::parse_statement_with_dialect(stmt, self.from);
let table = if table_name.is_empty() {
None
} else {
Some(table_name.as_str())
};
match stmt_type {
StatementType::CreateTable => self.convert_create_table(stmt, table),
StatementType::Insert => self.convert_insert(stmt, table),
StatementType::CreateIndex => self.convert_create_index(stmt),
StatementType::AlterTable => self.convert_alter_table(stmt),
StatementType::DropTable => self.convert_drop_table(stmt),
StatementType::Copy => self.convert_copy(stmt, table),
StatementType::Unknown => self.convert_other(stmt),
}
}
fn convert_create_table(
&mut self,
stmt: &[u8],
table_name: Option<&str>,
) -> Result<Vec<u8>, ConvertWarning> {
let stmt_str = String::from_utf8_lossy(stmt);
let mut result = stmt_str.to_string();
self.detect_unsupported_features(&result, table_name)?;
if self.from == SqlDialect::Mssql && self.to != SqlDialect::Mssql {
result = self.strip_mssql_schema_prefix(&result);
result = self.convert_mssql_getdate(&result);
result = self.strip_mssql_on_filegroup(&result);
result = self.strip_mssql_clustered(&result);
result = self.convert_mssql_unicode_strings(&result);
}
result = self.convert_identifiers(&result);
result = self.convert_data_types(&result);
result = self.convert_auto_increment(&result, table_name);
if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
result = self.strip_postgres_casts(&result);
result = self.convert_nextval(&result);
result = self.convert_default_now(&result);
result = self.strip_schema_prefix(&result);
}
result = self.convert_string_escapes(&result);
result = self.strip_conditional_comments(&result);
result = self.strip_engine_clause(&result);
result = self.strip_charset_clauses(&result);
Ok(result.into_bytes())
}
fn convert_insert(
&mut self,
stmt: &[u8],
_table_name: Option<&str>,
) -> Result<Vec<u8>, ConvertWarning> {
let stmt_str = String::from_utf8_lossy(stmt);
let mut result = stmt_str.to_string();
if self.from == SqlDialect::Mssql && self.to != SqlDialect::Mssql {
result = self.strip_mssql_schema_prefix(&result);
result = self.convert_mssql_unicode_strings(&result);
}
result = self.convert_identifiers(&result);
if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
result = self.strip_postgres_casts(&result);
result = self.strip_schema_prefix(&result);
}
result = self.convert_string_escapes(&result);
Ok(result.into_bytes())
}
fn convert_create_index(&mut self, stmt: &[u8]) -> Result<Vec<u8>, ConvertWarning> {
let stmt_str = String::from_utf8_lossy(stmt);
let mut result = stmt_str.to_string();
if self.from == SqlDialect::Mssql && self.to != SqlDialect::Mssql {
result = self.strip_mssql_schema_prefix(&result);
result = self.strip_mssql_clustered(&result);
}
result = self.convert_identifiers(&result);
if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
result = self.strip_postgres_casts(&result);
result = self.strip_schema_prefix(&result);
}
if result.contains("FULLTEXT") || result.contains("fulltext") {
self.warnings.add(ConvertWarning::UnsupportedFeature {
feature: "FULLTEXT INDEX".to_string(),
suggestion: Some("Use PostgreSQL GIN index or skip".to_string()),
});
if self.strict {
return Err(ConvertWarning::UnsupportedFeature {
feature: "FULLTEXT INDEX".to_string(),
suggestion: None,
});
}
}
Ok(result.into_bytes())
}
fn convert_alter_table(&mut self, stmt: &[u8]) -> Result<Vec<u8>, ConvertWarning> {
let stmt_str = String::from_utf8_lossy(stmt);
let mut result = stmt_str.to_string();
result = self.convert_identifiers(&result);
result = self.convert_data_types(&result);
if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
result = self.strip_postgres_casts(&result);
result = self.convert_nextval(&result);
result = self.convert_default_now(&result);
result = self.strip_schema_prefix(&result);
}
Ok(result.into_bytes())
}
fn convert_drop_table(&mut self, stmt: &[u8]) -> Result<Vec<u8>, ConvertWarning> {
let stmt_str = String::from_utf8_lossy(stmt);
let mut result = stmt_str.to_string();
result = self.convert_identifiers(&result);
if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
result = self.strip_schema_prefix(&result);
}
Ok(result.into_bytes())
}
fn convert_copy(
&mut self,
stmt: &[u8],
_table_name: Option<&str>,
) -> Result<Vec<u8>, ConvertWarning> {
let stmt_str = String::from_utf8_lossy(stmt);
let upper = stmt_str.to_uppercase();
if upper.contains("FROM STDIN") {
if let Some(header) = parse_copy_header(&stmt_str) {
if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
self.pending_copy_header = Some(header);
return Ok(Vec::new());
}
}
}
Ok(stmt.to_vec())
}
fn convert_other(&mut self, stmt: &[u8]) -> Result<Vec<u8>, ConvertWarning> {
let stmt_str = String::from_utf8_lossy(stmt);
let result = stmt_str.to_string();
let trimmed = result.trim();
if self.from == SqlDialect::MySql
&& self.to != SqlDialect::MySql
&& self.is_mysql_session_command(&result)
{
return Ok(Vec::new()); }
if self.from == SqlDialect::Postgres
&& self.to != SqlDialect::Postgres
&& self.is_postgres_session_command(&result)
{
return Ok(Vec::new()); }
if self.from == SqlDialect::Postgres
&& self.to != SqlDialect::Postgres
&& self.is_postgres_only_feature(trimmed)
{
self.warnings.add(ConvertWarning::SkippedStatement {
reason: "PostgreSQL-only feature".to_string(),
statement_preview: trimmed.chars().take(60).collect(),
});
return Ok(Vec::new()); }
if self.from == SqlDialect::Sqlite
&& self.to != SqlDialect::Sqlite
&& self.is_sqlite_pragma(&result)
{
return Ok(Vec::new()); }
if self.from == SqlDialect::Mssql
&& self.to != SqlDialect::Mssql
&& self.is_mssql_session_command(&result)
{
return Ok(Vec::new()); }
if result.contains("/*!") {
let stripped = self.strip_conditional_comments(&result);
return Ok(stripped.into_bytes());
}
Ok(stmt.to_vec())
}
fn is_mysql_session_command(&self, stmt: &str) -> bool {
let upper = stmt.to_uppercase();
upper.contains("SET NAMES")
|| upper.contains("SET CHARACTER")
|| upper.contains("SET SQL_MODE")
|| upper.contains("SET TIME_ZONE")
|| upper.contains("SET FOREIGN_KEY_CHECKS")
|| upper.contains("LOCK TABLES")
|| upper.contains("UNLOCK TABLES")
}
fn is_postgres_session_command(&self, stmt: &str) -> bool {
let upper = stmt.to_uppercase();
upper.contains("SET CLIENT_ENCODING")
|| upper.contains("SET STANDARD_CONFORMING_STRINGS")
|| upper.contains("SET CHECK_FUNCTION_BODIES")
|| upper.contains("SET SEARCH_PATH")
|| upper.contains("SET DEFAULT_TABLESPACE")
|| upper.contains("SET LOCK_TIMEOUT")
|| upper.contains("SET IDLE_IN_TRANSACTION_SESSION_TIMEOUT")
|| upper.contains("SET ROW_SECURITY")
|| upper.contains("SET STATEMENT_TIMEOUT")
|| upper.contains("SET XMLOPTION")
|| upper.contains("SET CLIENT_MIN_MESSAGES")
|| upper.contains("SET DEFAULT_TABLE_ACCESS_METHOD")
|| upper.contains("SELECT PG_CATALOG")
|| upper.contains("OWNER TO")
|| upper.contains("GRANT ")
|| upper.contains("REVOKE ")
}
fn is_postgres_only_feature(&self, stmt: &str) -> bool {
let stripped = self.strip_leading_sql_comments(stmt);
let upper = stripped.to_uppercase();
upper.starts_with("CREATE DOMAIN")
|| upper.starts_with("CREATE TYPE")
|| upper.starts_with("CREATE FUNCTION")
|| upper.starts_with("CREATE PROCEDURE")
|| upper.starts_with("CREATE AGGREGATE")
|| upper.starts_with("CREATE OPERATOR")
|| upper.starts_with("CREATE SEQUENCE")
|| upper.starts_with("CREATE EXTENSION")
|| upper.starts_with("CREATE SCHEMA")
|| upper.starts_with("CREATE TRIGGER")
|| upper.starts_with("ALTER DOMAIN")
|| upper.starts_with("ALTER TYPE")
|| upper.starts_with("ALTER FUNCTION")
|| upper.starts_with("ALTER SEQUENCE")
|| upper.starts_with("ALTER SCHEMA")
|| upper.starts_with("COMMENT ON")
}
fn strip_leading_sql_comments(&self, stmt: &str) -> String {
let mut result = stmt.trim();
loop {
if result.starts_with("--") {
if let Some(pos) = result.find('\n') {
result = result[pos + 1..].trim();
continue;
} else {
return String::new();
}
}
if result.starts_with("/*") {
if let Some(pos) = result.find("*/") {
result = result[pos + 2..].trim();
continue;
} else {
return String::new();
}
}
break;
}
result.to_string()
}
fn is_sqlite_pragma(&self, stmt: &str) -> bool {
let upper = stmt.to_uppercase();
upper.contains("PRAGMA")
}
fn is_mssql_session_command(&self, stmt: &str) -> bool {
let upper = stmt.to_uppercase();
upper.contains("SET ANSI_NULLS")
|| upper.contains("SET QUOTED_IDENTIFIER")
|| upper.contains("SET NOCOUNT")
|| upper.contains("SET XACT_ABORT")
|| upper.contains("SET ARITHABORT")
|| upper.contains("SET ANSI_WARNINGS")
|| upper.contains("SET ANSI_PADDING")
|| upper.contains("SET CONCAT_NULL_YIELDS_NULL")
|| upper.contains("SET NUMERIC_ROUNDABORT")
|| upper.contains("SET IDENTITY_INSERT")
}
fn convert_identifiers(&self, stmt: &str) -> String {
match (self.from, self.to) {
(SqlDialect::MySql, SqlDialect::Postgres | SqlDialect::Sqlite) => {
self.backticks_to_double_quotes(stmt)
}
(SqlDialect::MySql, SqlDialect::Mssql) => {
self.backticks_to_square_brackets(stmt)
}
(SqlDialect::Postgres | SqlDialect::Sqlite, SqlDialect::MySql) => {
self.double_quotes_to_backticks(stmt)
}
(SqlDialect::Postgres | SqlDialect::Sqlite, SqlDialect::Mssql) => {
self.double_quotes_to_square_brackets(stmt)
}
(SqlDialect::Mssql, SqlDialect::MySql) => {
self.square_brackets_to_backticks(stmt)
}
(SqlDialect::Mssql, SqlDialect::Postgres | SqlDialect::Sqlite) => {
self.square_brackets_to_double_quotes(stmt)
}
_ => stmt.to_string(),
}
}
pub fn backticks_to_double_quotes(&self, stmt: &str) -> String {
let mut result = String::with_capacity(stmt.len());
let mut in_string = false;
let mut in_backtick = false;
for c in stmt.chars() {
if c == '\'' && !in_backtick {
in_string = !in_string;
result.push(c);
} else if c == '`' && !in_string {
in_backtick = !in_backtick;
result.push('"');
} else {
result.push(c);
}
}
result
}
pub fn double_quotes_to_backticks(&self, stmt: &str) -> String {
let mut result = String::with_capacity(stmt.len());
let mut in_string = false;
let mut in_dquote = false;
let chars = stmt.chars();
for c in chars {
if c == '\'' && !in_dquote {
in_string = !in_string;
result.push(c);
} else if c == '"' && !in_string {
in_dquote = !in_dquote;
result.push('`');
} else {
result.push(c);
}
}
result
}
pub fn backticks_to_square_brackets(&self, stmt: &str) -> String {
let mut result = String::with_capacity(stmt.len());
let mut in_string = false;
let mut in_backtick = false;
for c in stmt.chars() {
if c == '\'' && !in_backtick {
in_string = !in_string;
result.push(c);
} else if c == '`' && !in_string {
if !in_backtick {
result.push('[');
} else {
result.push(']');
}
in_backtick = !in_backtick;
} else {
result.push(c);
}
}
result
}
pub fn double_quotes_to_square_brackets(&self, stmt: &str) -> String {
let mut result = String::with_capacity(stmt.len());
let mut in_string = false;
let mut in_dquote = false;
for c in stmt.chars() {
if c == '\'' && !in_dquote {
in_string = !in_string;
result.push(c);
} else if c == '"' && !in_string {
if !in_dquote {
result.push('[');
} else {
result.push(']');
}
in_dquote = !in_dquote;
} else {
result.push(c);
}
}
result
}
pub fn square_brackets_to_backticks(&self, stmt: &str) -> String {
let mut result = String::with_capacity(stmt.len());
let mut in_string = false;
for c in stmt.chars() {
if c == '\'' {
in_string = !in_string;
result.push(c);
} else if !in_string && (c == '[' || c == ']') {
result.push('`');
} else {
result.push(c);
}
}
result
}
pub fn square_brackets_to_double_quotes(&self, stmt: &str) -> String {
let mut result = String::with_capacity(stmt.len());
let mut in_string = false;
for c in stmt.chars() {
if c == '\'' {
in_string = !in_string;
result.push(c);
} else if !in_string && (c == '[' || c == ']') {
result.push('"');
} else {
result.push(c);
}
}
result
}
fn convert_data_types(&self, stmt: &str) -> String {
TypeMapper::convert(stmt, self.from, self.to)
}
fn convert_auto_increment(&self, stmt: &str, _table_name: Option<&str>) -> String {
match (self.from, self.to) {
(SqlDialect::MySql, SqlDialect::Postgres) => {
let result = stmt.replace("BIGINT AUTO_INCREMENT", "BIGSERIAL");
let result = result.replace("bigint AUTO_INCREMENT", "BIGSERIAL");
let result = result.replace("INT AUTO_INCREMENT", "SERIAL");
let result = result.replace("int AUTO_INCREMENT", "SERIAL");
result.replace("AUTO_INCREMENT", "") }
(SqlDialect::MySql, SqlDialect::Sqlite) => {
let result = stmt.replace("INT AUTO_INCREMENT", "INTEGER");
let result = result.replace("int AUTO_INCREMENT", "INTEGER");
result.replace("AUTO_INCREMENT", "")
}
(SqlDialect::Postgres, SqlDialect::MySql) => {
let result = stmt.replace("BIGSERIAL", "BIGINT AUTO_INCREMENT");
let result = result.replace("bigserial", "BIGINT AUTO_INCREMENT");
let result = result.replace("SMALLSERIAL", "SMALLINT AUTO_INCREMENT");
let result = result.replace("smallserial", "SMALLINT AUTO_INCREMENT");
let result = result.replace("SERIAL", "INT AUTO_INCREMENT");
result.replace("serial", "INT AUTO_INCREMENT")
}
(SqlDialect::Postgres, SqlDialect::Sqlite) => {
let result = stmt.replace("BIGSERIAL", "INTEGER");
let result = result.replace("bigserial", "INTEGER");
let result = result.replace("SMALLSERIAL", "INTEGER");
let result = result.replace("smallserial", "INTEGER");
let result = result.replace("SERIAL", "INTEGER");
result.replace("serial", "INTEGER")
}
(SqlDialect::Sqlite, SqlDialect::MySql) => {
stmt.to_string()
}
(SqlDialect::Sqlite, SqlDialect::Postgres) => {
stmt.to_string()
}
(SqlDialect::MySql, SqlDialect::Mssql) => {
let result = stmt.replace("BIGINT AUTO_INCREMENT", "BIGINT IDENTITY(1,1)");
let result = result.replace("bigint AUTO_INCREMENT", "BIGINT IDENTITY(1,1)");
let result = result.replace("INT AUTO_INCREMENT", "INT IDENTITY(1,1)");
let result = result.replace("int AUTO_INCREMENT", "INT IDENTITY(1,1)");
result.replace("AUTO_INCREMENT", "IDENTITY(1,1)")
}
(SqlDialect::Mssql, SqlDialect::MySql) => {
self.convert_identity_to_auto_increment(stmt)
}
(SqlDialect::Postgres, SqlDialect::Mssql) => {
stmt.to_string()
}
(SqlDialect::Mssql, SqlDialect::Postgres) => {
self.convert_identity_to_serial(stmt)
}
(SqlDialect::Sqlite, SqlDialect::Mssql) => {
stmt.to_string()
}
(SqlDialect::Mssql, SqlDialect::Sqlite) => {
self.strip_identity(stmt)
}
_ => stmt.to_string(),
}
}
fn convert_identity_to_auto_increment(&self, stmt: &str) -> String {
use once_cell::sync::Lazy;
use regex::Regex;
static RE_IDENTITY: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\bIDENTITY\s*\(\s*\d+\s*,\s*\d+\s*\)").unwrap());
RE_IDENTITY.replace_all(stmt, "AUTO_INCREMENT").to_string()
}
fn convert_identity_to_serial(&self, stmt: &str) -> String {
use once_cell::sync::Lazy;
use regex::Regex;
static RE_BIGINT_IDENTITY: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\bBIGINT\s+IDENTITY\s*\(\s*\d+\s*,\s*\d+\s*\)").unwrap());
static RE_INT_IDENTITY: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\bINT\s+IDENTITY\s*\(\s*\d+\s*,\s*\d+\s*\)").unwrap());
static RE_SMALLINT_IDENTITY: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)\bSMALLINT\s+IDENTITY\s*\(\s*\d+\s*,\s*\d+\s*\)").unwrap()
});
let result = RE_BIGINT_IDENTITY
.replace_all(stmt, "BIGSERIAL")
.to_string();
let result = RE_INT_IDENTITY.replace_all(&result, "SERIAL").to_string();
RE_SMALLINT_IDENTITY
.replace_all(&result, "SMALLSERIAL")
.to_string()
}
fn strip_identity(&self, stmt: &str) -> String {
use once_cell::sync::Lazy;
use regex::Regex;
static RE_IDENTITY: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\s*IDENTITY\s*\(\s*\d+\s*,\s*\d+\s*\)").unwrap());
RE_IDENTITY.replace_all(stmt, "").to_string()
}
fn convert_string_escapes(&self, stmt: &str) -> String {
match (self.from, self.to) {
(SqlDialect::MySql, SqlDialect::Postgres | SqlDialect::Sqlite) => {
self.mysql_escapes_to_standard(stmt)
}
_ => stmt.to_string(),
}
}
fn mysql_escapes_to_standard(&self, stmt: &str) -> String {
let mut result = String::with_capacity(stmt.len());
let mut chars = stmt.chars().peekable();
let mut in_string = false;
while let Some(c) = chars.next() {
if c == '\'' {
in_string = !in_string;
result.push(c);
} else if c == '\\' && in_string {
if let Some(&next) = chars.peek() {
match next {
'\'' => {
chars.next();
result.push_str("''");
}
'\\' => {
chars.next();
result.push_str("\\\\");
}
'n' | 'r' | 't' | '0' => {
result.push(c);
}
_ => {
result.push(c);
}
}
} else {
result.push(c);
}
} else {
result.push(c);
}
}
result
}
fn strip_conditional_comments(&self, stmt: &str) -> String {
let mut result = String::with_capacity(stmt.len());
let mut chars = stmt.chars().peekable();
while let Some(c) = chars.next() {
if c == '/' && chars.peek() == Some(&'*') {
chars.next(); if chars.peek() == Some(&'!') {
chars.next(); while chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
chars.next();
}
let mut depth = 1;
while depth > 0 {
match chars.next() {
Some('*') if chars.peek() == Some(&'/') => {
chars.next();
depth -= 1;
}
Some('/') if chars.peek() == Some(&'*') => {
chars.next();
depth += 1;
}
None => break,
_ => {}
}
}
} else {
result.push('/');
result.push('*');
}
} else {
result.push(c);
}
}
result
}
fn strip_engine_clause(&self, stmt: &str) -> String {
use once_cell::sync::Lazy;
use regex::Regex;
if self.to == SqlDialect::MySql {
return stmt.to_string();
}
static RE_ENGINE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\s*ENGINE\s*=\s*\w+").unwrap());
RE_ENGINE.replace_all(stmt, "").to_string()
}
fn strip_charset_clauses(&self, stmt: &str) -> String {
use once_cell::sync::Lazy;
use regex::Regex;
if self.to == SqlDialect::MySql {
return stmt.to_string();
}
static RE_CHARSET: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\s*(DEFAULT\s+)?CHARSET\s*=\s*\w+").unwrap());
static RE_COLLATE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\s*COLLATE\s*=?\s*\w+").unwrap());
let result = RE_CHARSET.replace_all(stmt, "").to_string();
RE_COLLATE.replace_all(&result, "").to_string()
}
fn strip_postgres_casts(&self, stmt: &str) -> String {
use once_cell::sync::Lazy;
use regex::Regex;
static RE_CAST: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"::[a-zA-Z_][a-zA-Z0-9_]*(?:\s+[a-zA-Z_][a-zA-Z0-9_]*)*").unwrap()
});
RE_CAST.replace_all(stmt, "").to_string()
}
fn convert_nextval(&self, stmt: &str) -> String {
use once_cell::sync::Lazy;
use regex::Regex;
static RE_NEXTVAL: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\s*DEFAULT\s+nextval\s*\([^)]+\)").unwrap());
RE_NEXTVAL.replace_all(stmt, "").to_string()
}
fn convert_default_now(&self, stmt: &str) -> String {
use once_cell::sync::Lazy;
use regex::Regex;
static RE_NOW: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\bDEFAULT\s+now\s*\(\s*\)").unwrap());
RE_NOW
.replace_all(stmt, "DEFAULT CURRENT_TIMESTAMP")
.to_string()
}
fn strip_schema_prefix(&self, stmt: &str) -> String {
use once_cell::sync::Lazy;
use regex::Regex;
static RE_SCHEMA: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(?i)\b(public|pg_catalog|pg_temp)\s*\.\s*"#).unwrap());
RE_SCHEMA.replace_all(stmt, "").to_string()
}
fn convert_mssql_getdate(&self, stmt: &str) -> String {
use once_cell::sync::Lazy;
use regex::Regex;
static RE_GETDATE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\bGETDATE\s*\(\s*\)").unwrap());
static RE_SYSDATETIME: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\bSYSDATETIME\s*\(\s*\)").unwrap());
static RE_GETUTCDATE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\bGETUTCDATE\s*\(\s*\)").unwrap());
let result = RE_GETDATE
.replace_all(stmt, "CURRENT_TIMESTAMP")
.to_string();
let result = RE_SYSDATETIME
.replace_all(&result, "CURRENT_TIMESTAMP")
.to_string();
RE_GETUTCDATE
.replace_all(&result, "CURRENT_TIMESTAMP")
.to_string()
}
fn strip_mssql_on_filegroup(&self, stmt: &str) -> String {
use once_cell::sync::Lazy;
use regex::Regex;
static RE_ON_FILEGROUP: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\s+ON\s*\[\s*\w+\s*\]").unwrap());
RE_ON_FILEGROUP.replace_all(stmt, "").to_string()
}
fn strip_mssql_clustered(&self, stmt: &str) -> String {
use once_cell::sync::Lazy;
use regex::Regex;
static RE_CLUSTERED: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bCLUSTERED\s+").unwrap());
static RE_NONCLUSTERED: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\bNONCLUSTERED\s+").unwrap());
let result = RE_CLUSTERED.replace_all(stmt, "").to_string();
RE_NONCLUSTERED.replace_all(&result, "").to_string()
}
fn convert_mssql_unicode_strings(&self, stmt: &str) -> String {
use once_cell::sync::Lazy;
use regex::Regex;
static RE_UNICODE_STRING: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bN'").unwrap());
RE_UNICODE_STRING.replace_all(stmt, "'").to_string()
}
fn strip_mssql_schema_prefix(&self, stmt: &str) -> String {
use once_cell::sync::Lazy;
use regex::Regex;
static RE_MSSQL_SCHEMA: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\[?dbo\]?\s*\.\s*").unwrap());
RE_MSSQL_SCHEMA.replace_all(stmt, "").to_string()
}
fn detect_unsupported_features(
&mut self,
stmt: &str,
table_name: Option<&str>,
) -> Result<(), ConvertWarning> {
let upper = stmt.to_uppercase();
if self.from == SqlDialect::MySql {
if upper.contains("ENUM(") {
let warning = ConvertWarning::UnsupportedFeature {
feature: format!(
"ENUM type{}",
table_name
.map(|t| format!(" in table {}", t))
.unwrap_or_default()
),
suggestion: Some(
"Converted to VARCHAR - consider adding CHECK constraint".to_string(),
),
};
self.warnings.add(warning.clone());
if self.strict {
return Err(warning);
}
}
if upper.contains("SET(") {
let warning = ConvertWarning::UnsupportedFeature {
feature: format!(
"SET type{}",
table_name
.map(|t| format!(" in table {}", t))
.unwrap_or_default()
),
suggestion: Some(
"Converted to VARCHAR - SET semantics not preserved".to_string(),
),
};
self.warnings.add(warning.clone());
if self.strict {
return Err(warning);
}
}
if upper.contains("UNSIGNED") {
self.warnings.add(ConvertWarning::UnsupportedFeature {
feature: "UNSIGNED modifier".to_string(),
suggestion: Some(
"Removed - consider adding CHECK constraint for non-negative values"
.to_string(),
),
});
}
}
if self.from == SqlDialect::Postgres {
if upper.contains("[]") || upper.contains("ARRAY[") {
let warning = ConvertWarning::UnsupportedFeature {
feature: format!(
"Array type{}",
table_name
.map(|t| format!(" in table {}", t))
.unwrap_or_default()
),
suggestion: Some(
"Array types not supported in target dialect - consider using JSON"
.to_string(),
),
};
self.warnings.add(warning.clone());
if self.strict {
return Err(warning);
}
}
if upper.contains("INHERITS") {
let warning = ConvertWarning::UnsupportedFeature {
feature: "Table inheritance (INHERITS)".to_string(),
suggestion: Some(
"PostgreSQL table inheritance not supported in target dialect".to_string(),
),
};
self.warnings.add(warning.clone());
if self.strict {
return Err(warning);
}
}
if upper.contains("PARTITION BY") && self.to == SqlDialect::Sqlite {
let warning = ConvertWarning::UnsupportedFeature {
feature: "Table partitioning".to_string(),
suggestion: Some("Partitioning not supported in SQLite".to_string()),
};
self.warnings.add(warning.clone());
if self.strict {
return Err(warning);
}
}
}
Ok(())
}
pub fn warnings(&self) -> &[ConvertWarning] {
self.warnings.warnings()
}
}
pub fn run(config: ConvertConfig) -> anyhow::Result<ConvertStats> {
let mut stats = ConvertStats::default();
let from_dialect = if let Some(d) = config.from_dialect {
d
} else {
let result = crate::parser::detect_dialect_from_file(&config.input)?;
if config.progress {
eprintln!(
"Auto-detected source dialect: {} (confidence: {:?})",
result.dialect, result.confidence
);
}
result.dialect
};
if from_dialect == config.to_dialect {
anyhow::bail!(
"Source and target dialects are the same ({}). No conversion needed.",
from_dialect
);
}
let file_size = std::fs::metadata(&config.input)?.len();
let progress_bar = if config.progress {
let pb = ProgressBar::new(file_size);
pb.set_style(
ProgressStyle::with_template(
"{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {bytes}/{total_bytes} ({percent}%) {msg}",
)
.unwrap()
.progress_chars("█▓▒░ ")
.tick_chars("⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"),
);
pb.enable_steady_tick(std::time::Duration::from_millis(100));
pb.set_message("Converting...");
Some(pb)
} else {
None
};
let mut converter = Converter::new(from_dialect, config.to_dialect).with_strict(config.strict);
let file = File::open(&config.input)?;
let compression = Compression::from_path(&config.input);
let reader: Box<dyn Read> = if let Some(ref pb) = progress_bar {
let pb_clone = pb.clone();
let progress_reader = ProgressReader::new(file, move |bytes| {
pb_clone.set_position(bytes);
});
compression.wrap_reader(Box::new(progress_reader))?
} else {
compression.wrap_reader(Box::new(file))?
};
let mut parser = Parser::with_dialect(reader, 64 * 1024, from_dialect);
let mut writer: Box<dyn Write> = if config.dry_run {
Box::new(std::io::sink())
} else {
match &config.output {
Some(path) => {
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
Box::new(BufWriter::with_capacity(256 * 1024, File::create(path)?))
}
None => Box::new(BufWriter::new(std::io::stdout())),
}
};
if !config.dry_run {
write_header(&mut writer, &config, from_dialect)?;
}
while let Some(stmt) = parser.read_statement()? {
stats.statements_processed += 1;
if converter.has_pending_copy() {
match converter.process_copy_data(&stmt) {
Ok(inserts) => {
for insert in inserts {
if !insert.is_empty() {
stats.statements_converted += 1;
if !config.dry_run {
writer.write_all(&insert)?;
writer.write_all(b"\n")?;
}
}
}
}
Err(warning) => {
stats.warnings.push(warning);
stats.statements_skipped += 1;
}
}
continue;
}
match converter.convert_statement(&stmt) {
Ok(converted) => {
if converted.is_empty() {
stats.statements_skipped += 1;
} else if converted == stmt {
stats.statements_unchanged += 1;
if !config.dry_run {
writer.write_all(&converted)?;
writer.write_all(b"\n")?;
}
} else {
stats.statements_converted += 1;
if !config.dry_run {
writer.write_all(&converted)?;
writer.write_all(b"\n")?;
}
}
}
Err(warning) => {
stats.warnings.push(warning);
stats.statements_skipped += 1;
}
}
}
stats.warnings.extend(converter.warnings().iter().cloned());
if let Some(pb) = progress_bar {
pb.finish_with_message("done");
}
Ok(stats)
}
fn write_header(
writer: &mut dyn Write,
config: &ConvertConfig,
from: SqlDialect,
) -> std::io::Result<()> {
writeln!(writer, "-- Converted by sql-splitter")?;
writeln!(writer, "-- From: {} → To: {}", from, config.to_dialect)?;
writeln!(writer, "-- Source: {}", config.input.display())?;
writeln!(writer)?;
match config.to_dialect {
SqlDialect::Postgres => {
writeln!(writer, "SET client_encoding = 'UTF8';")?;
writeln!(writer, "SET standard_conforming_strings = on;")?;
}
SqlDialect::Sqlite => {
writeln!(writer, "PRAGMA foreign_keys = OFF;")?;
}
SqlDialect::MySql => {
writeln!(writer, "SET NAMES utf8mb4;")?;
writeln!(writer, "SET FOREIGN_KEY_CHECKS = 0;")?;
}
SqlDialect::Mssql => {
writeln!(writer, "SET ANSI_NULLS ON;")?;
writeln!(writer, "SET QUOTED_IDENTIFIER ON;")?;
writeln!(writer, "SET NOCOUNT ON;")?;
}
}
writeln!(writer)?;
Ok(())
}