1pub mod copy_to_insert;
12mod types;
13mod warnings;
14
15#[allow(unused_imports)]
16pub use copy_to_insert::{
17 copy_to_inserts, parse_copy_data, parse_copy_header, CopyHeader, CopyValue,
18};
19
20use crate::parser::{Parser, SqlDialect, StatementType};
21use crate::progress::ProgressReader;
22use crate::splitter::Compression;
23use indicatif::{ProgressBar, ProgressStyle};
24use std::fs::File;
25use std::io::{BufWriter, Read, Write};
26use std::path::PathBuf;
27
28pub use types::TypeMapper;
29pub use warnings::{print_warnings_summary, ConvertWarning, WarningCollector};
30
31#[derive(Debug)]
33pub struct ConvertConfig {
34 pub input: PathBuf,
36 pub output: Option<PathBuf>,
38 pub from_dialect: Option<SqlDialect>,
40 pub to_dialect: SqlDialect,
42 pub dry_run: bool,
44 pub progress: bool,
46 pub strict: bool,
48}
49
50impl Default for ConvertConfig {
51 fn default() -> Self {
52 Self {
53 input: PathBuf::new(),
54 output: None,
55 from_dialect: None,
56 to_dialect: SqlDialect::Postgres,
57 dry_run: false,
58 progress: false,
59 strict: false,
60 }
61 }
62}
63
64#[derive(Debug, Default, serde::Serialize)]
66pub struct ConvertStats {
67 pub statements_processed: u64,
69 pub statements_converted: u64,
71 pub statements_unchanged: u64,
73 pub statements_skipped: u64,
75 pub warnings: Vec<ConvertWarning>,
77}
78
79pub struct Converter {
81 from: SqlDialect,
82 to: SqlDialect,
83 warnings: WarningCollector,
84 strict: bool,
85 pending_copy_header: Option<CopyHeader>,
87}
88
89impl Converter {
90 pub fn new(from: SqlDialect, to: SqlDialect) -> Self {
91 Self {
92 from,
93 to,
94 warnings: WarningCollector::new(),
95 strict: false,
96 pending_copy_header: None,
97 }
98 }
99
100 pub fn with_strict(mut self, strict: bool) -> Self {
101 self.strict = strict;
102 self
103 }
104
105 pub fn has_pending_copy(&self) -> bool {
107 self.pending_copy_header.is_some()
108 }
109
110 pub fn process_copy_data(&mut self, data: &[u8]) -> Result<Vec<Vec<u8>>, ConvertWarning> {
112 if let Some(header) = self.pending_copy_header.take() {
113 if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
114 let inserts = copy_to_inserts(&header, data, self.to);
116 return Ok(inserts);
117 }
118 }
119 Ok(vec![data.to_vec()])
121 }
122
123 pub fn convert_statement(&mut self, stmt: &[u8]) -> Result<Vec<u8>, ConvertWarning> {
125 let (stmt_type, table_name) =
126 Parser::<&[u8]>::parse_statement_with_dialect(stmt, self.from);
127
128 let table = if table_name.is_empty() {
129 None
130 } else {
131 Some(table_name.as_str())
132 };
133
134 match stmt_type {
135 StatementType::CreateTable => self.convert_create_table(stmt, table),
136 StatementType::Insert => self.convert_insert(stmt, table),
137 StatementType::CreateIndex => self.convert_create_index(stmt),
138 StatementType::AlterTable => self.convert_alter_table(stmt),
139 StatementType::DropTable => self.convert_drop_table(stmt),
140 StatementType::Copy => self.convert_copy(stmt, table),
141 StatementType::Unknown => self.convert_other(stmt),
142 }
143 }
144
145 fn convert_create_table(
147 &mut self,
148 stmt: &[u8],
149 table_name: Option<&str>,
150 ) -> Result<Vec<u8>, ConvertWarning> {
151 let stmt_str = String::from_utf8_lossy(stmt);
152 let mut result = stmt_str.to_string();
153
154 self.detect_unsupported_features(&result, table_name)?;
156
157 if self.from == SqlDialect::Mssql && self.to != SqlDialect::Mssql {
160 result = self.strip_mssql_schema_prefix(&result);
161 result = self.convert_mssql_getdate(&result);
162 result = self.strip_mssql_on_filegroup(&result);
163 result = self.strip_mssql_clustered(&result);
164 result = self.convert_mssql_unicode_strings(&result);
165 }
166
167 result = self.convert_identifiers(&result);
169
170 result = self.convert_data_types(&result);
172
173 result = self.convert_auto_increment(&result, table_name);
175
176 if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
178 result = self.strip_postgres_casts(&result);
179 result = self.convert_nextval(&result);
180 result = self.convert_default_now(&result);
181 result = self.strip_schema_prefix(&result);
182 }
183
184 result = self.convert_string_escapes(&result);
186
187 result = self.strip_conditional_comments(&result);
189
190 result = self.strip_engine_clause(&result);
192
193 result = self.strip_charset_clauses(&result);
195
196 Ok(result.into_bytes())
197 }
198
199 fn convert_insert(
201 &mut self,
202 stmt: &[u8],
203 _table_name: Option<&str>,
204 ) -> Result<Vec<u8>, ConvertWarning> {
205 let stmt_str = String::from_utf8_lossy(stmt);
206 let mut result = stmt_str.to_string();
207
208 if self.from == SqlDialect::Mssql && self.to != SqlDialect::Mssql {
210 result = self.strip_mssql_schema_prefix(&result);
211 result = self.convert_mssql_unicode_strings(&result);
212 }
213
214 result = self.convert_identifiers(&result);
216
217 if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
219 result = self.strip_postgres_casts(&result);
220 result = self.strip_schema_prefix(&result);
221 }
222
223 result = self.convert_string_escapes(&result);
225
226 Ok(result.into_bytes())
227 }
228
229 fn convert_create_index(&mut self, stmt: &[u8]) -> Result<Vec<u8>, ConvertWarning> {
231 let stmt_str = String::from_utf8_lossy(stmt);
232 let mut result = stmt_str.to_string();
233
234 if self.from == SqlDialect::Mssql && self.to != SqlDialect::Mssql {
236 result = self.strip_mssql_schema_prefix(&result);
237 result = self.strip_mssql_clustered(&result);
238 }
239
240 result = self.convert_identifiers(&result);
242
243 if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
245 result = self.strip_postgres_casts(&result);
246 result = self.strip_schema_prefix(&result);
247 }
248
249 if result.contains("FULLTEXT") || result.contains("fulltext") {
251 self.warnings.add(ConvertWarning::UnsupportedFeature {
252 feature: "FULLTEXT INDEX".to_string(),
253 suggestion: Some("Use PostgreSQL GIN index or skip".to_string()),
254 });
255 if self.strict {
256 return Err(ConvertWarning::UnsupportedFeature {
257 feature: "FULLTEXT INDEX".to_string(),
258 suggestion: None,
259 });
260 }
261 }
262
263 Ok(result.into_bytes())
264 }
265
266 fn convert_alter_table(&mut self, stmt: &[u8]) -> Result<Vec<u8>, ConvertWarning> {
268 let stmt_str = String::from_utf8_lossy(stmt);
269 let mut result = stmt_str.to_string();
270
271 result = self.convert_identifiers(&result);
272 result = self.convert_data_types(&result);
273
274 if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
276 result = self.strip_postgres_casts(&result);
277 result = self.convert_nextval(&result);
278 result = self.convert_default_now(&result);
279 result = self.strip_schema_prefix(&result);
280 }
281
282 Ok(result.into_bytes())
283 }
284
285 fn convert_drop_table(&mut self, stmt: &[u8]) -> Result<Vec<u8>, ConvertWarning> {
287 let stmt_str = String::from_utf8_lossy(stmt);
288 let mut result = stmt_str.to_string();
289
290 result = self.convert_identifiers(&result);
291
292 if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
294 result = self.strip_schema_prefix(&result);
295 }
296
297 Ok(result.into_bytes())
298 }
299
300 fn convert_copy(
305 &mut self,
306 stmt: &[u8],
307 _table_name: Option<&str>,
308 ) -> Result<Vec<u8>, ConvertWarning> {
309 let stmt_str = String::from_utf8_lossy(stmt);
310
311 let upper = stmt_str.to_uppercase();
313 if upper.contains("FROM STDIN") {
314 if let Some(header) = parse_copy_header(&stmt_str) {
316 if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
317 self.pending_copy_header = Some(header);
319 return Ok(Vec::new());
321 }
322 }
323 }
324
325 Ok(stmt.to_vec())
327 }
328
329 fn convert_other(&mut self, stmt: &[u8]) -> Result<Vec<u8>, ConvertWarning> {
331 let stmt_str = String::from_utf8_lossy(stmt);
332 let result = stmt_str.to_string();
333 let trimmed = result.trim();
334
335 if self.from == SqlDialect::MySql
337 && self.to != SqlDialect::MySql
338 && self.is_mysql_session_command(&result)
339 {
340 return Ok(Vec::new()); }
342
343 if self.from == SqlDialect::Postgres
345 && self.to != SqlDialect::Postgres
346 && self.is_postgres_session_command(&result)
347 {
348 return Ok(Vec::new()); }
350 if self.from == SqlDialect::Postgres
351 && self.to != SqlDialect::Postgres
352 && self.is_postgres_only_feature(trimmed)
353 {
354 self.warnings.add(ConvertWarning::SkippedStatement {
355 reason: "PostgreSQL-only feature".to_string(),
356 statement_preview: trimmed.chars().take(60).collect(),
357 });
358 return Ok(Vec::new()); }
360
361 if self.from == SqlDialect::Sqlite
363 && self.to != SqlDialect::Sqlite
364 && self.is_sqlite_pragma(&result)
365 {
366 return Ok(Vec::new()); }
368
369 if self.from == SqlDialect::Mssql
371 && self.to != SqlDialect::Mssql
372 && self.is_mssql_session_command(&result)
373 {
374 return Ok(Vec::new()); }
376
377 if result.contains("/*!") {
379 let stripped = self.strip_conditional_comments(&result);
380 return Ok(stripped.into_bytes());
381 }
382
383 Ok(stmt.to_vec())
384 }
385
386 fn is_mysql_session_command(&self, stmt: &str) -> bool {
388 let upper = stmt.to_uppercase();
389 upper.contains("SET NAMES")
390 || upper.contains("SET CHARACTER")
391 || upper.contains("SET SQL_MODE")
392 || upper.contains("SET TIME_ZONE")
393 || upper.contains("SET FOREIGN_KEY_CHECKS")
394 || upper.contains("LOCK TABLES")
395 || upper.contains("UNLOCK TABLES")
396 }
397
398 fn is_postgres_session_command(&self, stmt: &str) -> bool {
400 let upper = stmt.to_uppercase();
401 upper.contains("SET CLIENT_ENCODING")
403 || upper.contains("SET STANDARD_CONFORMING_STRINGS")
404 || upper.contains("SET CHECK_FUNCTION_BODIES")
405 || upper.contains("SET SEARCH_PATH")
406 || upper.contains("SET DEFAULT_TABLESPACE")
407 || upper.contains("SET LOCK_TIMEOUT")
408 || upper.contains("SET IDLE_IN_TRANSACTION_SESSION_TIMEOUT")
409 || upper.contains("SET ROW_SECURITY")
410 || upper.contains("SET STATEMENT_TIMEOUT")
411 || upper.contains("SET XMLOPTION")
412 || upper.contains("SET CLIENT_MIN_MESSAGES")
413 || upper.contains("SET DEFAULT_TABLE_ACCESS_METHOD")
414 || upper.contains("SELECT PG_CATALOG")
415 || upper.contains("OWNER TO")
417 || upper.contains("GRANT ")
418 || upper.contains("REVOKE ")
419 }
420
421 fn is_postgres_only_feature(&self, stmt: &str) -> bool {
423 let stripped = self.strip_leading_sql_comments(stmt);
425 let upper = stripped.to_uppercase();
426
427 upper.starts_with("CREATE DOMAIN")
429 || upper.starts_with("CREATE TYPE")
430 || upper.starts_with("CREATE FUNCTION")
431 || upper.starts_with("CREATE PROCEDURE")
432 || upper.starts_with("CREATE AGGREGATE")
433 || upper.starts_with("CREATE OPERATOR")
434 || upper.starts_with("CREATE SEQUENCE")
435 || upper.starts_with("CREATE EXTENSION")
436 || upper.starts_with("CREATE SCHEMA")
437 || upper.starts_with("CREATE TRIGGER")
438 || upper.starts_with("ALTER DOMAIN")
439 || upper.starts_with("ALTER TYPE")
440 || upper.starts_with("ALTER FUNCTION")
441 || upper.starts_with("ALTER SEQUENCE")
442 || upper.starts_with("ALTER SCHEMA")
443 || upper.starts_with("COMMENT ON")
444 }
445
446 fn strip_leading_sql_comments(&self, stmt: &str) -> String {
448 let mut result = stmt.trim();
449 loop {
450 if result.starts_with("--") {
452 if let Some(pos) = result.find('\n') {
453 result = result[pos + 1..].trim();
454 continue;
455 } else {
456 return String::new();
457 }
458 }
459 if result.starts_with("/*") {
461 if let Some(pos) = result.find("*/") {
462 result = result[pos + 2..].trim();
463 continue;
464 } else {
465 return String::new();
466 }
467 }
468 break;
469 }
470 result.to_string()
471 }
472
473 fn is_sqlite_pragma(&self, stmt: &str) -> bool {
475 let upper = stmt.to_uppercase();
476 upper.contains("PRAGMA")
477 }
478
479 fn is_mssql_session_command(&self, stmt: &str) -> bool {
481 let upper = stmt.to_uppercase();
482 upper.contains("SET ANSI_NULLS")
483 || upper.contains("SET QUOTED_IDENTIFIER")
484 || upper.contains("SET NOCOUNT")
485 || upper.contains("SET XACT_ABORT")
486 || upper.contains("SET ARITHABORT")
487 || upper.contains("SET ANSI_WARNINGS")
488 || upper.contains("SET ANSI_PADDING")
489 || upper.contains("SET CONCAT_NULL_YIELDS_NULL")
490 || upper.contains("SET NUMERIC_ROUNDABORT")
491 || upper.contains("SET IDENTITY_INSERT")
492 }
493
494 fn convert_identifiers(&self, stmt: &str) -> String {
496 match (self.from, self.to) {
497 (SqlDialect::MySql, SqlDialect::Postgres | SqlDialect::Sqlite) => {
498 self.backticks_to_double_quotes(stmt)
500 }
501 (SqlDialect::MySql, SqlDialect::Mssql) => {
502 self.backticks_to_square_brackets(stmt)
504 }
505 (SqlDialect::Postgres | SqlDialect::Sqlite, SqlDialect::MySql) => {
506 self.double_quotes_to_backticks(stmt)
508 }
509 (SqlDialect::Postgres | SqlDialect::Sqlite, SqlDialect::Mssql) => {
510 self.double_quotes_to_square_brackets(stmt)
512 }
513 (SqlDialect::Mssql, SqlDialect::MySql) => {
514 self.square_brackets_to_backticks(stmt)
516 }
517 (SqlDialect::Mssql, SqlDialect::Postgres | SqlDialect::Sqlite) => {
518 self.square_brackets_to_double_quotes(stmt)
520 }
521 _ => stmt.to_string(),
522 }
523 }
524
525 pub fn backticks_to_double_quotes(&self, stmt: &str) -> String {
527 let mut result = String::with_capacity(stmt.len());
528 let mut in_string = false;
529 let mut in_backtick = false;
530
531 for c in stmt.chars() {
532 if c == '\'' && !in_backtick {
533 in_string = !in_string;
534 result.push(c);
535 } else if c == '`' && !in_string {
536 in_backtick = !in_backtick;
537 result.push('"');
538 } else {
539 result.push(c);
540 }
541 }
542 result
543 }
544
545 pub fn double_quotes_to_backticks(&self, stmt: &str) -> String {
547 let mut result = String::with_capacity(stmt.len());
548 let mut in_string = false;
549 let mut in_dquote = false;
550 let chars = stmt.chars();
551
552 for c in chars {
553 if c == '\'' && !in_dquote {
554 in_string = !in_string;
555 result.push(c);
556 } else if c == '"' && !in_string {
557 in_dquote = !in_dquote;
558 result.push('`');
559 } else {
560 result.push(c);
561 }
562 }
563 result
564 }
565
566 pub fn backticks_to_square_brackets(&self, stmt: &str) -> String {
568 let mut result = String::with_capacity(stmt.len());
569 let mut in_string = false;
570 let mut in_backtick = false;
571
572 for c in stmt.chars() {
573 if c == '\'' && !in_backtick {
574 in_string = !in_string;
575 result.push(c);
576 } else if c == '`' && !in_string {
577 if !in_backtick {
578 result.push('[');
579 } else {
580 result.push(']');
581 }
582 in_backtick = !in_backtick;
583 } else {
584 result.push(c);
585 }
586 }
587 result
588 }
589
590 pub fn double_quotes_to_square_brackets(&self, stmt: &str) -> String {
592 let mut result = String::with_capacity(stmt.len());
593 let mut in_string = false;
594 let mut in_dquote = false;
595
596 for c in stmt.chars() {
597 if c == '\'' && !in_dquote {
598 in_string = !in_string;
599 result.push(c);
600 } else if c == '"' && !in_string {
601 if !in_dquote {
602 result.push('[');
603 } else {
604 result.push(']');
605 }
606 in_dquote = !in_dquote;
607 } else {
608 result.push(c);
609 }
610 }
611 result
612 }
613
614 pub fn square_brackets_to_backticks(&self, stmt: &str) -> String {
616 let mut result = String::with_capacity(stmt.len());
617 let mut in_string = false;
618
619 for c in stmt.chars() {
620 if c == '\'' {
621 in_string = !in_string;
622 result.push(c);
623 } else if !in_string && (c == '[' || c == ']') {
624 result.push('`');
625 } else {
626 result.push(c);
627 }
628 }
629 result
630 }
631
632 pub fn square_brackets_to_double_quotes(&self, stmt: &str) -> String {
634 let mut result = String::with_capacity(stmt.len());
635 let mut in_string = false;
636
637 for c in stmt.chars() {
638 if c == '\'' {
639 in_string = !in_string;
640 result.push(c);
641 } else if !in_string && (c == '[' || c == ']') {
642 result.push('"');
643 } else {
644 result.push(c);
645 }
646 }
647 result
648 }
649
650 fn convert_data_types(&self, stmt: &str) -> String {
652 TypeMapper::convert(stmt, self.from, self.to)
653 }
654
655 fn convert_auto_increment(&self, stmt: &str, _table_name: Option<&str>) -> String {
657 match (self.from, self.to) {
658 (SqlDialect::MySql, SqlDialect::Postgres) => {
659 let result = stmt.replace("BIGINT AUTO_INCREMENT", "BIGSERIAL");
662 let result = result.replace("bigint AUTO_INCREMENT", "BIGSERIAL");
663 let result = result.replace("INT AUTO_INCREMENT", "SERIAL");
664 let result = result.replace("int AUTO_INCREMENT", "SERIAL");
665 result.replace("AUTO_INCREMENT", "") }
667 (SqlDialect::MySql, SqlDialect::Sqlite) => {
668 let result = stmt.replace("INT AUTO_INCREMENT", "INTEGER");
671 let result = result.replace("int AUTO_INCREMENT", "INTEGER");
672 result.replace("AUTO_INCREMENT", "")
673 }
674 (SqlDialect::Postgres, SqlDialect::MySql) => {
675 let result = stmt.replace("BIGSERIAL", "BIGINT AUTO_INCREMENT");
678 let result = result.replace("bigserial", "BIGINT AUTO_INCREMENT");
679 let result = result.replace("SMALLSERIAL", "SMALLINT AUTO_INCREMENT");
680 let result = result.replace("smallserial", "SMALLINT AUTO_INCREMENT");
681 let result = result.replace("SERIAL", "INT AUTO_INCREMENT");
682 result.replace("serial", "INT AUTO_INCREMENT")
683 }
684 (SqlDialect::Postgres, SqlDialect::Sqlite) => {
685 let result = stmt.replace("BIGSERIAL", "INTEGER");
687 let result = result.replace("bigserial", "INTEGER");
688 let result = result.replace("SMALLSERIAL", "INTEGER");
689 let result = result.replace("smallserial", "INTEGER");
690 let result = result.replace("SERIAL", "INTEGER");
691 result.replace("serial", "INTEGER")
692 }
693 (SqlDialect::Sqlite, SqlDialect::MySql) => {
694 stmt.to_string()
697 }
698 (SqlDialect::Sqlite, SqlDialect::Postgres) => {
699 stmt.to_string()
702 }
703 (SqlDialect::MySql, SqlDialect::Mssql) => {
705 let result = stmt.replace("BIGINT AUTO_INCREMENT", "BIGINT IDENTITY(1,1)");
707 let result = result.replace("bigint AUTO_INCREMENT", "BIGINT IDENTITY(1,1)");
708 let result = result.replace("INT AUTO_INCREMENT", "INT IDENTITY(1,1)");
709 let result = result.replace("int AUTO_INCREMENT", "INT IDENTITY(1,1)");
710 result.replace("AUTO_INCREMENT", "IDENTITY(1,1)")
711 }
712 (SqlDialect::Mssql, SqlDialect::MySql) => {
713 self.convert_identity_to_auto_increment(stmt)
715 }
716 (SqlDialect::Postgres, SqlDialect::Mssql) => {
717 stmt.to_string()
719 }
720 (SqlDialect::Mssql, SqlDialect::Postgres) => {
721 self.convert_identity_to_serial(stmt)
723 }
724 (SqlDialect::Sqlite, SqlDialect::Mssql) => {
725 stmt.to_string()
727 }
728 (SqlDialect::Mssql, SqlDialect::Sqlite) => {
729 self.strip_identity(stmt)
731 }
732 _ => stmt.to_string(),
733 }
734 }
735
736 fn convert_identity_to_auto_increment(&self, stmt: &str) -> String {
738 use once_cell::sync::Lazy;
739 use regex::Regex;
740
741 static RE_IDENTITY: Lazy<Regex> =
742 Lazy::new(|| Regex::new(r"(?i)\bIDENTITY\s*\(\s*\d+\s*,\s*\d+\s*\)").unwrap());
743
744 RE_IDENTITY.replace_all(stmt, "AUTO_INCREMENT").to_string()
745 }
746
747 fn convert_identity_to_serial(&self, stmt: &str) -> String {
749 use once_cell::sync::Lazy;
750 use regex::Regex;
751
752 static RE_BIGINT_IDENTITY: Lazy<Regex> =
754 Lazy::new(|| Regex::new(r"(?i)\bBIGINT\s+IDENTITY\s*\(\s*\d+\s*,\s*\d+\s*\)").unwrap());
755 static RE_INT_IDENTITY: Lazy<Regex> =
756 Lazy::new(|| Regex::new(r"(?i)\bINT\s+IDENTITY\s*\(\s*\d+\s*,\s*\d+\s*\)").unwrap());
757 static RE_SMALLINT_IDENTITY: Lazy<Regex> = Lazy::new(|| {
758 Regex::new(r"(?i)\bSMALLINT\s+IDENTITY\s*\(\s*\d+\s*,\s*\d+\s*\)").unwrap()
759 });
760
761 let result = RE_BIGINT_IDENTITY
762 .replace_all(stmt, "BIGSERIAL")
763 .to_string();
764 let result = RE_INT_IDENTITY.replace_all(&result, "SERIAL").to_string();
765 RE_SMALLINT_IDENTITY
766 .replace_all(&result, "SMALLSERIAL")
767 .to_string()
768 }
769
770 fn strip_identity(&self, stmt: &str) -> String {
772 use once_cell::sync::Lazy;
773 use regex::Regex;
774
775 static RE_IDENTITY: Lazy<Regex> =
776 Lazy::new(|| Regex::new(r"(?i)\s*IDENTITY\s*\(\s*\d+\s*,\s*\d+\s*\)").unwrap());
777
778 RE_IDENTITY.replace_all(stmt, "").to_string()
779 }
780
781 fn convert_string_escapes(&self, stmt: &str) -> String {
783 match (self.from, self.to) {
784 (SqlDialect::MySql, SqlDialect::Postgres | SqlDialect::Sqlite) => {
785 self.mysql_escapes_to_standard(stmt)
787 }
788 _ => stmt.to_string(),
789 }
790 }
791
792 fn mysql_escapes_to_standard(&self, stmt: &str) -> String {
794 let mut result = String::with_capacity(stmt.len());
795 let mut chars = stmt.chars().peekable();
796 let mut in_string = false;
797
798 while let Some(c) = chars.next() {
799 if c == '\'' {
800 in_string = !in_string;
801 result.push(c);
802 } else if c == '\\' && in_string {
803 if let Some(&next) = chars.peek() {
805 match next {
806 '\'' => {
807 chars.next();
809 result.push_str("''");
810 }
811 '\\' => {
812 chars.next();
814 result.push_str("\\\\");
815 }
816 'n' | 'r' | 't' | '0' => {
817 result.push(c);
819 }
820 _ => {
821 result.push(c);
822 }
823 }
824 } else {
825 result.push(c);
826 }
827 } else {
828 result.push(c);
829 }
830 }
831 result
832 }
833
834 fn strip_conditional_comments(&self, stmt: &str) -> String {
836 let mut result = String::with_capacity(stmt.len());
837 let mut chars = stmt.chars().peekable();
838
839 while let Some(c) = chars.next() {
840 if c == '/' && chars.peek() == Some(&'*') {
841 chars.next(); if chars.peek() == Some(&'!') {
843 chars.next(); while chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
847 chars.next();
848 }
849 let mut depth = 1;
851 while depth > 0 {
852 match chars.next() {
853 Some('*') if chars.peek() == Some(&'/') => {
854 chars.next();
855 depth -= 1;
856 }
857 Some('/') if chars.peek() == Some(&'*') => {
858 chars.next();
859 depth += 1;
860 }
861 None => break,
862 _ => {}
863 }
864 }
865 } else {
866 result.push('/');
868 result.push('*');
869 }
870 } else {
871 result.push(c);
872 }
873 }
874 result
875 }
876
877 fn strip_engine_clause(&self, stmt: &str) -> String {
879 use once_cell::sync::Lazy;
880 use regex::Regex;
881
882 if self.to == SqlDialect::MySql {
883 return stmt.to_string();
884 }
885
886 static RE_ENGINE: Lazy<Regex> =
888 Lazy::new(|| Regex::new(r"(?i)\s*ENGINE\s*=\s*\w+").unwrap());
889 RE_ENGINE.replace_all(stmt, "").to_string()
890 }
891
892 fn strip_charset_clauses(&self, stmt: &str) -> String {
894 use once_cell::sync::Lazy;
895 use regex::Regex;
896
897 if self.to == SqlDialect::MySql {
898 return stmt.to_string();
899 }
900
901 static RE_CHARSET: Lazy<Regex> =
902 Lazy::new(|| Regex::new(r"(?i)\s*(DEFAULT\s+)?CHARSET\s*=\s*\w+").unwrap());
903 static RE_COLLATE: Lazy<Regex> =
904 Lazy::new(|| Regex::new(r"(?i)\s*COLLATE\s*=?\s*\w+").unwrap());
905
906 let result = RE_CHARSET.replace_all(stmt, "").to_string();
907 RE_COLLATE.replace_all(&result, "").to_string()
908 }
909
910 fn strip_postgres_casts(&self, stmt: &str) -> String {
912 use once_cell::sync::Lazy;
913 use regex::Regex;
914
915 static RE_CAST: Lazy<Regex> = Lazy::new(|| {
917 Regex::new(r"::[a-zA-Z_][a-zA-Z0-9_]*(?:\s+[a-zA-Z_][a-zA-Z0-9_]*)*").unwrap()
918 });
919
920 RE_CAST.replace_all(stmt, "").to_string()
921 }
922
923 fn convert_nextval(&self, stmt: &str) -> String {
925 use once_cell::sync::Lazy;
926 use regex::Regex;
927
928 static RE_NEXTVAL: Lazy<Regex> =
931 Lazy::new(|| Regex::new(r"(?i)\s*DEFAULT\s+nextval\s*\([^)]+\)").unwrap());
932
933 RE_NEXTVAL.replace_all(stmt, "").to_string()
934 }
935
936 fn convert_default_now(&self, stmt: &str) -> String {
938 use once_cell::sync::Lazy;
939 use regex::Regex;
940
941 static RE_NOW: Lazy<Regex> =
942 Lazy::new(|| Regex::new(r"(?i)\bDEFAULT\s+now\s*\(\s*\)").unwrap());
943
944 RE_NOW
945 .replace_all(stmt, "DEFAULT CURRENT_TIMESTAMP")
946 .to_string()
947 }
948
949 fn strip_schema_prefix(&self, stmt: &str) -> String {
951 use once_cell::sync::Lazy;
952 use regex::Regex;
953
954 static RE_SCHEMA: Lazy<Regex> =
957 Lazy::new(|| Regex::new(r#"(?i)\b(public|pg_catalog|pg_temp)\s*\.\s*"#).unwrap());
958
959 RE_SCHEMA.replace_all(stmt, "").to_string()
960 }
961
962 fn convert_mssql_getdate(&self, stmt: &str) -> String {
964 use once_cell::sync::Lazy;
965 use regex::Regex;
966
967 static RE_GETDATE: Lazy<Regex> =
968 Lazy::new(|| Regex::new(r"(?i)\bGETDATE\s*\(\s*\)").unwrap());
969 static RE_SYSDATETIME: Lazy<Regex> =
970 Lazy::new(|| Regex::new(r"(?i)\bSYSDATETIME\s*\(\s*\)").unwrap());
971 static RE_GETUTCDATE: Lazy<Regex> =
972 Lazy::new(|| Regex::new(r"(?i)\bGETUTCDATE\s*\(\s*\)").unwrap());
973
974 let result = RE_GETDATE
975 .replace_all(stmt, "CURRENT_TIMESTAMP")
976 .to_string();
977 let result = RE_SYSDATETIME
978 .replace_all(&result, "CURRENT_TIMESTAMP")
979 .to_string();
980 RE_GETUTCDATE
981 .replace_all(&result, "CURRENT_TIMESTAMP")
982 .to_string()
983 }
984
985 fn strip_mssql_on_filegroup(&self, stmt: &str) -> String {
987 use once_cell::sync::Lazy;
988 use regex::Regex;
989
990 static RE_ON_FILEGROUP: Lazy<Regex> =
992 Lazy::new(|| Regex::new(r"(?i)\s+ON\s*\[\s*\w+\s*\]").unwrap());
993
994 RE_ON_FILEGROUP.replace_all(stmt, "").to_string()
995 }
996
997 fn strip_mssql_clustered(&self, stmt: &str) -> String {
999 use once_cell::sync::Lazy;
1000 use regex::Regex;
1001
1002 static RE_CLUSTERED: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bCLUSTERED\s+").unwrap());
1003 static RE_NONCLUSTERED: Lazy<Regex> =
1004 Lazy::new(|| Regex::new(r"(?i)\bNONCLUSTERED\s+").unwrap());
1005
1006 let result = RE_CLUSTERED.replace_all(stmt, "").to_string();
1007 RE_NONCLUSTERED.replace_all(&result, "").to_string()
1008 }
1009
1010 fn convert_mssql_unicode_strings(&self, stmt: &str) -> String {
1012 use once_cell::sync::Lazy;
1013 use regex::Regex;
1014
1015 static RE_UNICODE_STRING: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bN'").unwrap());
1018
1019 RE_UNICODE_STRING.replace_all(stmt, "'").to_string()
1020 }
1021
1022 fn strip_mssql_schema_prefix(&self, stmt: &str) -> String {
1024 use once_cell::sync::Lazy;
1025 use regex::Regex;
1026
1027 static RE_MSSQL_SCHEMA: Lazy<Regex> =
1030 Lazy::new(|| Regex::new(r"(?i)\[?dbo\]?\s*\.\s*").unwrap());
1031
1032 RE_MSSQL_SCHEMA.replace_all(stmt, "").to_string()
1033 }
1034
1035 fn detect_unsupported_features(
1037 &mut self,
1038 stmt: &str,
1039 table_name: Option<&str>,
1040 ) -> Result<(), ConvertWarning> {
1041 let upper = stmt.to_uppercase();
1042
1043 if self.from == SqlDialect::MySql {
1045 if upper.contains("ENUM(") {
1047 let warning = ConvertWarning::UnsupportedFeature {
1048 feature: format!(
1049 "ENUM type{}",
1050 table_name
1051 .map(|t| format!(" in table {}", t))
1052 .unwrap_or_default()
1053 ),
1054 suggestion: Some(
1055 "Converted to VARCHAR - consider adding CHECK constraint".to_string(),
1056 ),
1057 };
1058 self.warnings.add(warning.clone());
1059 if self.strict {
1060 return Err(warning);
1061 }
1062 }
1063
1064 if upper.contains("SET(") {
1066 let warning = ConvertWarning::UnsupportedFeature {
1067 feature: format!(
1068 "SET type{}",
1069 table_name
1070 .map(|t| format!(" in table {}", t))
1071 .unwrap_or_default()
1072 ),
1073 suggestion: Some(
1074 "Converted to VARCHAR - SET semantics not preserved".to_string(),
1075 ),
1076 };
1077 self.warnings.add(warning.clone());
1078 if self.strict {
1079 return Err(warning);
1080 }
1081 }
1082
1083 if upper.contains("UNSIGNED") {
1085 self.warnings.add(ConvertWarning::UnsupportedFeature {
1086 feature: "UNSIGNED modifier".to_string(),
1087 suggestion: Some(
1088 "Removed - consider adding CHECK constraint for non-negative values"
1089 .to_string(),
1090 ),
1091 });
1092 }
1093 }
1094
1095 if self.from == SqlDialect::Postgres {
1097 if upper.contains("[]") || upper.contains("ARRAY[") {
1099 let warning = ConvertWarning::UnsupportedFeature {
1100 feature: format!(
1101 "Array type{}",
1102 table_name
1103 .map(|t| format!(" in table {}", t))
1104 .unwrap_or_default()
1105 ),
1106 suggestion: Some(
1107 "Array types not supported in target dialect - consider using JSON"
1108 .to_string(),
1109 ),
1110 };
1111 self.warnings.add(warning.clone());
1112 if self.strict {
1113 return Err(warning);
1114 }
1115 }
1116
1117 if upper.contains("INHERITS") {
1119 let warning = ConvertWarning::UnsupportedFeature {
1120 feature: "Table inheritance (INHERITS)".to_string(),
1121 suggestion: Some(
1122 "PostgreSQL table inheritance not supported in target dialect".to_string(),
1123 ),
1124 };
1125 self.warnings.add(warning.clone());
1126 if self.strict {
1127 return Err(warning);
1128 }
1129 }
1130
1131 if upper.contains("PARTITION BY") && self.to == SqlDialect::Sqlite {
1133 let warning = ConvertWarning::UnsupportedFeature {
1134 feature: "Table partitioning".to_string(),
1135 suggestion: Some("Partitioning not supported in SQLite".to_string()),
1136 };
1137 self.warnings.add(warning.clone());
1138 if self.strict {
1139 return Err(warning);
1140 }
1141 }
1142 }
1143
1144 Ok(())
1145 }
1146
1147 pub fn warnings(&self) -> &[ConvertWarning] {
1149 self.warnings.warnings()
1150 }
1151}
1152
1153pub fn run(config: ConvertConfig) -> anyhow::Result<ConvertStats> {
1155 let mut stats = ConvertStats::default();
1156
1157 let from_dialect = if let Some(d) = config.from_dialect {
1159 d
1160 } else {
1161 let result = crate::parser::detect_dialect_from_file(&config.input)?;
1162 if config.progress {
1163 eprintln!(
1164 "Auto-detected source dialect: {} (confidence: {:?})",
1165 result.dialect, result.confidence
1166 );
1167 }
1168 result.dialect
1169 };
1170
1171 if from_dialect == config.to_dialect {
1173 anyhow::bail!(
1174 "Source and target dialects are the same ({}). No conversion needed.",
1175 from_dialect
1176 );
1177 }
1178
1179 let file_size = std::fs::metadata(&config.input)?.len();
1181
1182 let progress_bar = if config.progress {
1183 let pb = ProgressBar::new(file_size);
1184 pb.set_style(
1185 ProgressStyle::with_template(
1186 "{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {bytes}/{total_bytes} ({percent}%) {msg}",
1187 )
1188 .unwrap()
1189 .progress_chars("█▓▒░ ")
1190 .tick_chars("⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"),
1191 );
1192 pb.enable_steady_tick(std::time::Duration::from_millis(100));
1193 pb.set_message("Converting...");
1194 Some(pb)
1195 } else {
1196 None
1197 };
1198
1199 let mut converter = Converter::new(from_dialect, config.to_dialect).with_strict(config.strict);
1201
1202 let file = File::open(&config.input)?;
1204 let compression = Compression::from_path(&config.input);
1205 let reader: Box<dyn Read> = if let Some(ref pb) = progress_bar {
1206 let pb_clone = pb.clone();
1207 let progress_reader = ProgressReader::new(file, move |bytes| {
1208 pb_clone.set_position(bytes);
1209 });
1210 compression.wrap_reader(Box::new(progress_reader))?
1211 } else {
1212 compression.wrap_reader(Box::new(file))?
1213 };
1214 let mut parser = Parser::with_dialect(reader, 64 * 1024, from_dialect);
1215
1216 let mut writer: Box<dyn Write> = if config.dry_run {
1218 Box::new(std::io::sink())
1219 } else {
1220 match &config.output {
1221 Some(path) => {
1222 if let Some(parent) = path.parent() {
1223 std::fs::create_dir_all(parent)?;
1224 }
1225 Box::new(BufWriter::with_capacity(256 * 1024, File::create(path)?))
1226 }
1227 None => Box::new(BufWriter::new(std::io::stdout())),
1228 }
1229 };
1230
1231 if !config.dry_run {
1233 write_header(&mut writer, &config, from_dialect)?;
1234 }
1235
1236 while let Some(stmt) = parser.read_statement()? {
1238 stats.statements_processed += 1;
1239
1240 if converter.has_pending_copy() {
1242 match converter.process_copy_data(&stmt) {
1244 Ok(inserts) => {
1245 for insert in inserts {
1246 if !insert.is_empty() {
1247 stats.statements_converted += 1;
1248 if !config.dry_run {
1249 writer.write_all(&insert)?;
1250 writer.write_all(b"\n")?;
1251 }
1252 }
1253 }
1254 }
1255 Err(warning) => {
1256 stats.warnings.push(warning);
1257 stats.statements_skipped += 1;
1258 }
1259 }
1260 continue;
1261 }
1262
1263 match converter.convert_statement(&stmt) {
1264 Ok(converted) => {
1265 if converted.is_empty() {
1266 stats.statements_skipped += 1;
1267 } else if converted == stmt {
1268 stats.statements_unchanged += 1;
1269 if !config.dry_run {
1270 writer.write_all(&converted)?;
1271 writer.write_all(b"\n")?;
1272 }
1273 } else {
1274 stats.statements_converted += 1;
1275 if !config.dry_run {
1276 writer.write_all(&converted)?;
1277 writer.write_all(b"\n")?;
1278 }
1279 }
1280 }
1281 Err(warning) => {
1282 stats.warnings.push(warning);
1283 stats.statements_skipped += 1;
1284 }
1285 }
1286 }
1287
1288 stats.warnings.extend(converter.warnings().iter().cloned());
1290
1291 if let Some(pb) = progress_bar {
1292 pb.finish_with_message("done");
1293 }
1294
1295 Ok(stats)
1296}
1297
1298fn write_header(
1300 writer: &mut dyn Write,
1301 config: &ConvertConfig,
1302 from: SqlDialect,
1303) -> std::io::Result<()> {
1304 writeln!(writer, "-- Converted by sql-splitter")?;
1305 writeln!(writer, "-- From: {} → To: {}", from, config.to_dialect)?;
1306 writeln!(writer, "-- Source: {}", config.input.display())?;
1307 writeln!(writer)?;
1308
1309 match config.to_dialect {
1311 SqlDialect::Postgres => {
1312 writeln!(writer, "SET client_encoding = 'UTF8';")?;
1313 writeln!(writer, "SET standard_conforming_strings = on;")?;
1314 }
1315 SqlDialect::Sqlite => {
1316 writeln!(writer, "PRAGMA foreign_keys = OFF;")?;
1317 }
1318 SqlDialect::MySql => {
1319 writeln!(writer, "SET NAMES utf8mb4;")?;
1320 writeln!(writer, "SET FOREIGN_KEY_CHECKS = 0;")?;
1321 }
1322 SqlDialect::Mssql => {
1323 writeln!(writer, "SET ANSI_NULLS ON;")?;
1324 writeln!(writer, "SET QUOTED_IDENTIFIER ON;")?;
1325 writeln!(writer, "SET NOCOUNT ON;")?;
1326 }
1327 }
1328 writeln!(writer)?;
1329
1330 Ok(())
1331}