1pub mod copy_to_insert;
12mod types;
13mod warnings;
14
15#[allow(unused_imports)]
16pub use copy_to_insert::{
17 copy_to_inserts, parse_copy_data, parse_copy_header, CopyHeader, CopyValue,
18};
19
20use crate::parser::{Parser, SqlDialect, StatementType};
21use crate::progress::ProgressReader;
22use crate::splitter::Compression;
23use indicatif::{ProgressBar, ProgressStyle};
24use std::fs::File;
25use std::io::{BufWriter, Read, Write};
26use std::path::PathBuf;
27
28pub use types::TypeMapper;
29pub use warnings::{ConvertWarning, WarningCollector};
30
31#[derive(Debug)]
33pub struct ConvertConfig {
34 pub input: PathBuf,
36 pub output: Option<PathBuf>,
38 pub from_dialect: Option<SqlDialect>,
40 pub to_dialect: SqlDialect,
42 pub dry_run: bool,
44 pub progress: bool,
46 pub strict: bool,
48}
49
50impl Default for ConvertConfig {
51 fn default() -> Self {
52 Self {
53 input: PathBuf::new(),
54 output: None,
55 from_dialect: None,
56 to_dialect: SqlDialect::Postgres,
57 dry_run: false,
58 progress: false,
59 strict: false,
60 }
61 }
62}
63
64#[derive(Debug, Default, serde::Serialize)]
66pub struct ConvertStats {
67 pub statements_processed: u64,
69 pub statements_converted: u64,
71 pub statements_unchanged: u64,
73 pub statements_skipped: u64,
75 pub warnings: Vec<ConvertWarning>,
77}
78
79pub struct Converter {
81 from: SqlDialect,
82 to: SqlDialect,
83 warnings: WarningCollector,
84 strict: bool,
85 pending_copy_header: Option<CopyHeader>,
87}
88
89impl Converter {
90 pub fn new(from: SqlDialect, to: SqlDialect) -> Self {
91 Self {
92 from,
93 to,
94 warnings: WarningCollector::new(),
95 strict: false,
96 pending_copy_header: None,
97 }
98 }
99
100 pub fn with_strict(mut self, strict: bool) -> Self {
101 self.strict = strict;
102 self
103 }
104
105 pub fn has_pending_copy(&self) -> bool {
107 self.pending_copy_header.is_some()
108 }
109
110 pub fn process_copy_data(&mut self, data: &[u8]) -> Result<Vec<Vec<u8>>, ConvertWarning> {
112 if let Some(header) = self.pending_copy_header.take() {
113 if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
114 let inserts = copy_to_inserts(&header, data, self.to);
116 return Ok(inserts);
117 }
118 }
119 Ok(vec![data.to_vec()])
121 }
122
123 pub fn convert_statement(&mut self, stmt: &[u8]) -> Result<Vec<u8>, ConvertWarning> {
125 let (stmt_type, table_name) =
126 Parser::<&[u8]>::parse_statement_with_dialect(stmt, self.from);
127
128 let table = if table_name.is_empty() {
129 None
130 } else {
131 Some(table_name.as_str())
132 };
133
134 match stmt_type {
135 StatementType::CreateTable => self.convert_create_table(stmt, table),
136 StatementType::Insert => self.convert_insert(stmt, table),
137 StatementType::CreateIndex => self.convert_create_index(stmt),
138 StatementType::AlterTable => self.convert_alter_table(stmt),
139 StatementType::DropTable => self.convert_drop_table(stmt),
140 StatementType::Copy => self.convert_copy(stmt, table),
141 StatementType::Unknown => self.convert_other(stmt),
142 }
143 }
144
145 fn convert_create_table(
147 &mut self,
148 stmt: &[u8],
149 table_name: Option<&str>,
150 ) -> Result<Vec<u8>, ConvertWarning> {
151 let stmt_str = String::from_utf8_lossy(stmt);
152 let mut result = stmt_str.to_string();
153
154 self.detect_unsupported_features(&result, table_name)?;
156
157 if self.from == SqlDialect::Mssql && self.to != SqlDialect::Mssql {
160 result = self.strip_mssql_schema_prefix(&result);
161 result = self.convert_mssql_getdate(&result);
162 result = self.strip_mssql_on_filegroup(&result);
163 result = self.strip_mssql_clustered(&result);
164 result = self.convert_mssql_unicode_strings(&result);
165 }
166
167 result = self.convert_identifiers(&result);
169
170 result = self.convert_data_types(&result);
172
173 result = self.convert_auto_increment(&result, table_name);
175
176 if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
178 result = self.strip_postgres_casts(&result);
179 result = self.convert_nextval(&result);
180 result = self.convert_default_now(&result);
181 result = self.strip_schema_prefix(&result);
182 }
183
184 result = self.convert_string_escapes(&result);
186
187 result = self.strip_conditional_comments(&result);
189
190 result = self.strip_engine_clause(&result);
192
193 result = self.strip_charset_clauses(&result);
195
196 Ok(result.into_bytes())
197 }
198
199 fn convert_insert(
201 &mut self,
202 stmt: &[u8],
203 _table_name: Option<&str>,
204 ) -> Result<Vec<u8>, ConvertWarning> {
205 let stmt_str = String::from_utf8_lossy(stmt);
206 let mut result = stmt_str.to_string();
207
208 if self.from == SqlDialect::Mssql && self.to != SqlDialect::Mssql {
210 result = self.strip_mssql_schema_prefix(&result);
211 result = self.convert_mssql_unicode_strings(&result);
212 }
213
214 result = self.convert_identifiers(&result);
216
217 if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
219 result = self.strip_postgres_casts(&result);
220 result = self.strip_schema_prefix(&result);
221 }
222
223 result = self.convert_string_escapes(&result);
225
226 Ok(result.into_bytes())
227 }
228
229 fn convert_create_index(&mut self, stmt: &[u8]) -> Result<Vec<u8>, ConvertWarning> {
231 let stmt_str = String::from_utf8_lossy(stmt);
232 let mut result = stmt_str.to_string();
233
234 if self.from == SqlDialect::Mssql && self.to != SqlDialect::Mssql {
236 result = self.strip_mssql_schema_prefix(&result);
237 result = self.strip_mssql_clustered(&result);
238 }
239
240 result = self.convert_identifiers(&result);
242
243 if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
245 result = self.strip_postgres_casts(&result);
246 result = self.strip_schema_prefix(&result);
247 }
248
249 if result.contains("FULLTEXT") || result.contains("fulltext") {
251 self.warnings.add(ConvertWarning::UnsupportedFeature {
252 feature: "FULLTEXT INDEX".to_string(),
253 suggestion: Some("Use PostgreSQL GIN index or skip".to_string()),
254 });
255 if self.strict {
256 return Err(ConvertWarning::UnsupportedFeature {
257 feature: "FULLTEXT INDEX".to_string(),
258 suggestion: None,
259 });
260 }
261 }
262
263 Ok(result.into_bytes())
264 }
265
266 fn convert_alter_table(&mut self, stmt: &[u8]) -> Result<Vec<u8>, ConvertWarning> {
268 let stmt_str = String::from_utf8_lossy(stmt);
269 let mut result = stmt_str.to_string();
270
271 result = self.convert_identifiers(&result);
272 result = self.convert_data_types(&result);
273
274 if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
276 result = self.strip_postgres_casts(&result);
277 result = self.convert_nextval(&result);
278 result = self.convert_default_now(&result);
279 result = self.strip_schema_prefix(&result);
280 }
281
282 Ok(result.into_bytes())
283 }
284
285 fn convert_drop_table(&mut self, stmt: &[u8]) -> Result<Vec<u8>, ConvertWarning> {
287 let stmt_str = String::from_utf8_lossy(stmt);
288 let mut result = stmt_str.to_string();
289
290 result = self.convert_identifiers(&result);
291
292 if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
294 result = self.strip_schema_prefix(&result);
295 }
296
297 Ok(result.into_bytes())
298 }
299
300 fn convert_copy(
305 &mut self,
306 stmt: &[u8],
307 _table_name: Option<&str>,
308 ) -> Result<Vec<u8>, ConvertWarning> {
309 let stmt_str = String::from_utf8_lossy(stmt);
310
311 let upper = stmt_str.to_uppercase();
313 if upper.contains("FROM STDIN") {
314 if let Some(header) = parse_copy_header(&stmt_str) {
316 if self.from == SqlDialect::Postgres && self.to != SqlDialect::Postgres {
317 self.pending_copy_header = Some(header);
319 return Ok(Vec::new());
321 }
322 }
323 }
324
325 Ok(stmt.to_vec())
327 }
328
329 fn convert_other(&mut self, stmt: &[u8]) -> Result<Vec<u8>, ConvertWarning> {
331 let stmt_str = String::from_utf8_lossy(stmt);
332 let result = stmt_str.to_string();
333 let trimmed = result.trim();
334
335 if self.from == SqlDialect::MySql
337 && self.to != SqlDialect::MySql
338 && self.is_mysql_session_command(&result)
339 {
340 return Ok(Vec::new()); }
342
343 if self.from == SqlDialect::Postgres
345 && self.to != SqlDialect::Postgres
346 && self.is_postgres_session_command(&result)
347 {
348 return Ok(Vec::new()); }
350 if self.from == SqlDialect::Postgres
351 && self.to != SqlDialect::Postgres
352 && self.is_postgres_only_feature(trimmed)
353 {
354 self.warnings.add(ConvertWarning::SkippedStatement {
355 reason: "PostgreSQL-only feature".to_string(),
356 statement_preview: trimmed.chars().take(60).collect(),
357 });
358 return Ok(Vec::new()); }
360
361 if self.from == SqlDialect::Sqlite
363 && self.to != SqlDialect::Sqlite
364 && self.is_sqlite_pragma(&result)
365 {
366 return Ok(Vec::new()); }
368
369 if self.from == SqlDialect::Mssql
371 && self.to != SqlDialect::Mssql
372 && self.is_mssql_session_command(&result)
373 {
374 return Ok(Vec::new()); }
376
377 if result.contains("/*!") {
379 let stripped = self.strip_conditional_comments(&result);
380 return Ok(stripped.into_bytes());
381 }
382
383 Ok(stmt.to_vec())
384 }
385
386 fn is_mysql_session_command(&self, stmt: &str) -> bool {
388 let upper = stmt.to_uppercase();
389 upper.contains("SET NAMES")
390 || upper.contains("SET CHARACTER")
391 || upper.contains("SET SQL_MODE")
392 || upper.contains("SET TIME_ZONE")
393 || upper.contains("SET FOREIGN_KEY_CHECKS")
394 || upper.contains("LOCK TABLES")
395 || upper.contains("UNLOCK TABLES")
396 }
397
398 fn is_postgres_session_command(&self, stmt: &str) -> bool {
400 let upper = stmt.to_uppercase();
401 upper.contains("SET CLIENT_ENCODING")
403 || upper.contains("SET STANDARD_CONFORMING_STRINGS")
404 || upper.contains("SET CHECK_FUNCTION_BODIES")
405 || upper.contains("SET SEARCH_PATH")
406 || upper.contains("SET DEFAULT_TABLESPACE")
407 || upper.contains("SET LOCK_TIMEOUT")
408 || upper.contains("SET IDLE_IN_TRANSACTION_SESSION_TIMEOUT")
409 || upper.contains("SET ROW_SECURITY")
410 || upper.contains("SET STATEMENT_TIMEOUT")
411 || upper.contains("SET XMLOPTION")
412 || upper.contains("SET CLIENT_MIN_MESSAGES")
413 || upper.contains("SET DEFAULT_TABLE_ACCESS_METHOD")
414 || upper.contains("SELECT PG_CATALOG")
415 || upper.contains("OWNER TO")
417 || upper.contains("GRANT ")
418 || upper.contains("REVOKE ")
419 }
420
421 fn is_postgres_only_feature(&self, stmt: &str) -> bool {
423 let stripped = self.strip_leading_sql_comments(stmt);
425 let upper = stripped.to_uppercase();
426
427 upper.starts_with("CREATE DOMAIN")
429 || upper.starts_with("CREATE TYPE")
430 || upper.starts_with("CREATE FUNCTION")
431 || upper.starts_with("CREATE PROCEDURE")
432 || upper.starts_with("CREATE AGGREGATE")
433 || upper.starts_with("CREATE OPERATOR")
434 || upper.starts_with("CREATE SEQUENCE")
435 || upper.starts_with("CREATE EXTENSION")
436 || upper.starts_with("CREATE SCHEMA")
437 || upper.starts_with("CREATE TRIGGER")
438 || upper.starts_with("ALTER DOMAIN")
439 || upper.starts_with("ALTER TYPE")
440 || upper.starts_with("ALTER FUNCTION")
441 || upper.starts_with("ALTER SEQUENCE")
442 || upper.starts_with("ALTER SCHEMA")
443 || upper.starts_with("COMMENT ON")
444 }
445
446 fn strip_leading_sql_comments(&self, stmt: &str) -> String {
448 let mut result = stmt.trim();
449 loop {
450 if result.starts_with("--") {
452 if let Some(pos) = result.find('\n') {
453 result = result[pos + 1..].trim();
454 continue;
455 } else {
456 return String::new();
457 }
458 }
459 if result.starts_with("/*") {
461 if let Some(pos) = result.find("*/") {
462 result = result[pos + 2..].trim();
463 continue;
464 } else {
465 return String::new();
466 }
467 }
468 break;
469 }
470 result.to_string()
471 }
472
473 fn is_sqlite_pragma(&self, stmt: &str) -> bool {
475 let upper = stmt.to_uppercase();
476 upper.contains("PRAGMA")
477 }
478
479 fn is_mssql_session_command(&self, stmt: &str) -> bool {
481 let upper = stmt.to_uppercase();
482 upper.contains("SET ANSI_NULLS")
483 || upper.contains("SET QUOTED_IDENTIFIER")
484 || upper.contains("SET NOCOUNT")
485 || upper.contains("SET XACT_ABORT")
486 || upper.contains("SET ARITHABORT")
487 || upper.contains("SET ANSI_WARNINGS")
488 || upper.contains("SET ANSI_PADDING")
489 || upper.contains("SET CONCAT_NULL_YIELDS_NULL")
490 || upper.contains("SET NUMERIC_ROUNDABORT")
491 || upper.contains("SET IDENTITY_INSERT")
492 }
493
494 fn convert_identifiers(&self, stmt: &str) -> String {
496 match (self.from, self.to) {
497 (SqlDialect::MySql, SqlDialect::Postgres | SqlDialect::Sqlite) => {
498 self.backticks_to_double_quotes(stmt)
500 }
501 (SqlDialect::MySql, SqlDialect::Mssql) => {
502 self.backticks_to_square_brackets(stmt)
504 }
505 (SqlDialect::Postgres | SqlDialect::Sqlite, SqlDialect::MySql) => {
506 self.double_quotes_to_backticks(stmt)
508 }
509 (SqlDialect::Postgres | SqlDialect::Sqlite, SqlDialect::Mssql) => {
510 self.double_quotes_to_square_brackets(stmt)
512 }
513 (SqlDialect::Mssql, SqlDialect::MySql) => {
514 self.square_brackets_to_backticks(stmt)
516 }
517 (SqlDialect::Mssql, SqlDialect::Postgres | SqlDialect::Sqlite) => {
518 self.square_brackets_to_double_quotes(stmt)
520 }
521 _ => stmt.to_string(),
522 }
523 }
524
525 pub fn backticks_to_double_quotes(&self, stmt: &str) -> String {
527 let mut result = String::with_capacity(stmt.len());
528 let mut in_string = false;
529 let mut in_backtick = false;
530
531 for c in stmt.chars() {
532 if c == '\'' && !in_backtick {
533 in_string = !in_string;
534 result.push(c);
535 } else if c == '`' && !in_string {
536 in_backtick = !in_backtick;
537 result.push('"');
538 } else {
539 result.push(c);
540 }
541 }
542 result
543 }
544
545 pub fn double_quotes_to_backticks(&self, stmt: &str) -> String {
547 let mut result = String::with_capacity(stmt.len());
548 let mut in_string = false;
549 let mut in_dquote = false;
550 let chars = stmt.chars();
551
552 for c in chars {
553 if c == '\'' && !in_dquote {
554 in_string = !in_string;
555 result.push(c);
556 } else if c == '"' && !in_string {
557 in_dquote = !in_dquote;
558 result.push('`');
559 } else {
560 result.push(c);
561 }
562 }
563 result
564 }
565
566 pub fn backticks_to_square_brackets(&self, stmt: &str) -> String {
568 let mut result = String::with_capacity(stmt.len());
569 let mut in_string = false;
570 let mut in_backtick = false;
571
572 for c in stmt.chars() {
573 if c == '\'' && !in_backtick {
574 in_string = !in_string;
575 result.push(c);
576 } else if c == '`' && !in_string {
577 if !in_backtick {
578 result.push('[');
579 } else {
580 result.push(']');
581 }
582 in_backtick = !in_backtick;
583 } else {
584 result.push(c);
585 }
586 }
587 result
588 }
589
590 pub fn double_quotes_to_square_brackets(&self, stmt: &str) -> String {
592 let mut result = String::with_capacity(stmt.len());
593 let mut in_string = false;
594 let mut in_dquote = false;
595
596 for c in stmt.chars() {
597 if c == '\'' && !in_dquote {
598 in_string = !in_string;
599 result.push(c);
600 } else if c == '"' && !in_string {
601 if !in_dquote {
602 result.push('[');
603 } else {
604 result.push(']');
605 }
606 in_dquote = !in_dquote;
607 } else {
608 result.push(c);
609 }
610 }
611 result
612 }
613
614 pub fn square_brackets_to_backticks(&self, stmt: &str) -> String {
616 let mut result = String::with_capacity(stmt.len());
617 let mut in_string = false;
618
619 for c in stmt.chars() {
620 if c == '\'' {
621 in_string = !in_string;
622 result.push(c);
623 } else if !in_string && (c == '[' || c == ']') {
624 result.push('`');
625 } else {
626 result.push(c);
627 }
628 }
629 result
630 }
631
632 pub fn square_brackets_to_double_quotes(&self, stmt: &str) -> String {
634 let mut result = String::with_capacity(stmt.len());
635 let mut in_string = false;
636
637 for c in stmt.chars() {
638 if c == '\'' {
639 in_string = !in_string;
640 result.push(c);
641 } else if !in_string && (c == '[' || c == ']') {
642 result.push('"');
643 } else {
644 result.push(c);
645 }
646 }
647 result
648 }
649
650 fn convert_data_types(&self, stmt: &str) -> String {
652 TypeMapper::convert(stmt, self.from, self.to)
653 }
654
655 fn convert_auto_increment(&self, stmt: &str, _table_name: Option<&str>) -> String {
657 match (self.from, self.to) {
658 (SqlDialect::MySql, SqlDialect::Postgres) => {
659 let result = stmt.replace("BIGINT AUTO_INCREMENT", "BIGSERIAL");
662 let result = result.replace("bigint AUTO_INCREMENT", "BIGSERIAL");
663 let result = result.replace("INT AUTO_INCREMENT", "SERIAL");
664 let result = result.replace("int AUTO_INCREMENT", "SERIAL");
665 result.replace("AUTO_INCREMENT", "") }
667 (SqlDialect::MySql, SqlDialect::Sqlite) => {
668 let result = stmt.replace("INT AUTO_INCREMENT", "INTEGER");
671 let result = result.replace("int AUTO_INCREMENT", "INTEGER");
672 result.replace("AUTO_INCREMENT", "")
673 }
674 (SqlDialect::Postgres, SqlDialect::MySql) => {
675 let result = stmt.replace("BIGSERIAL", "BIGINT AUTO_INCREMENT");
678 let result = result.replace("bigserial", "BIGINT AUTO_INCREMENT");
679 let result = result.replace("SMALLSERIAL", "SMALLINT AUTO_INCREMENT");
680 let result = result.replace("smallserial", "SMALLINT AUTO_INCREMENT");
681 let result = result.replace("SERIAL", "INT AUTO_INCREMENT");
682 result.replace("serial", "INT AUTO_INCREMENT")
683 }
684 (SqlDialect::Postgres, SqlDialect::Sqlite) => {
685 let result = stmt.replace("BIGSERIAL", "INTEGER");
687 let result = result.replace("bigserial", "INTEGER");
688 let result = result.replace("SMALLSERIAL", "INTEGER");
689 let result = result.replace("smallserial", "INTEGER");
690 let result = result.replace("SERIAL", "INTEGER");
691 result.replace("serial", "INTEGER")
692 }
693 (SqlDialect::Sqlite, SqlDialect::MySql) => {
694 stmt.to_string()
697 }
698 (SqlDialect::Sqlite, SqlDialect::Postgres) => {
699 stmt.to_string()
702 }
703 (SqlDialect::MySql, SqlDialect::Mssql) => {
705 let result = stmt.replace("BIGINT AUTO_INCREMENT", "BIGINT IDENTITY(1,1)");
707 let result = result.replace("bigint AUTO_INCREMENT", "BIGINT IDENTITY(1,1)");
708 let result = result.replace("INT AUTO_INCREMENT", "INT IDENTITY(1,1)");
709 let result = result.replace("int AUTO_INCREMENT", "INT IDENTITY(1,1)");
710 result.replace("AUTO_INCREMENT", "IDENTITY(1,1)")
711 }
712 (SqlDialect::Mssql, SqlDialect::MySql) => {
713 self.convert_identity_to_auto_increment(stmt)
715 }
716 (SqlDialect::Postgres, SqlDialect::Mssql) => {
717 stmt.to_string()
719 }
720 (SqlDialect::Mssql, SqlDialect::Postgres) => {
721 self.convert_identity_to_serial(stmt)
723 }
724 (SqlDialect::Sqlite, SqlDialect::Mssql) => {
725 stmt.to_string()
727 }
728 (SqlDialect::Mssql, SqlDialect::Sqlite) => {
729 self.strip_identity(stmt)
731 }
732 _ => stmt.to_string(),
733 }
734 }
735
736 fn convert_identity_to_auto_increment(&self, stmt: &str) -> String {
738 use once_cell::sync::Lazy;
739 use regex::Regex;
740
741 static RE_IDENTITY: Lazy<Regex> =
742 Lazy::new(|| Regex::new(r"(?i)\bIDENTITY\s*\(\s*\d+\s*,\s*\d+\s*\)").unwrap());
743
744 RE_IDENTITY.replace_all(stmt, "AUTO_INCREMENT").to_string()
745 }
746
747 fn convert_identity_to_serial(&self, stmt: &str) -> String {
749 use once_cell::sync::Lazy;
750 use regex::Regex;
751
752 static RE_BIGINT_IDENTITY: Lazy<Regex> =
754 Lazy::new(|| Regex::new(r"(?i)\bBIGINT\s+IDENTITY\s*\(\s*\d+\s*,\s*\d+\s*\)").unwrap());
755 static RE_INT_IDENTITY: Lazy<Regex> =
756 Lazy::new(|| Regex::new(r"(?i)\bINT\s+IDENTITY\s*\(\s*\d+\s*,\s*\d+\s*\)").unwrap());
757 static RE_SMALLINT_IDENTITY: Lazy<Regex> = Lazy::new(|| {
758 Regex::new(r"(?i)\bSMALLINT\s+IDENTITY\s*\(\s*\d+\s*,\s*\d+\s*\)").unwrap()
759 });
760
761 let result = RE_BIGINT_IDENTITY
762 .replace_all(stmt, "BIGSERIAL")
763 .to_string();
764 let result = RE_INT_IDENTITY.replace_all(&result, "SERIAL").to_string();
765 RE_SMALLINT_IDENTITY
766 .replace_all(&result, "SMALLSERIAL")
767 .to_string()
768 }
769
770 fn strip_identity(&self, stmt: &str) -> String {
772 use once_cell::sync::Lazy;
773 use regex::Regex;
774
775 static RE_IDENTITY: Lazy<Regex> =
776 Lazy::new(|| Regex::new(r"(?i)\s*IDENTITY\s*\(\s*\d+\s*,\s*\d+\s*\)").unwrap());
777
778 RE_IDENTITY.replace_all(stmt, "").to_string()
779 }
780
781 fn convert_string_escapes(&self, stmt: &str) -> String {
783 match (self.from, self.to) {
784 (SqlDialect::MySql, SqlDialect::Postgres | SqlDialect::Sqlite) => {
785 self.mysql_escapes_to_standard(stmt)
787 }
788 _ => stmt.to_string(),
789 }
790 }
791
792 fn mysql_escapes_to_standard(&self, stmt: &str) -> String {
794 let mut result = String::with_capacity(stmt.len());
795 let mut chars = stmt.chars().peekable();
796 let mut in_string = false;
797
798 while let Some(c) = chars.next() {
799 if c == '\'' {
800 in_string = !in_string;
801 result.push(c);
802 } else if c == '\\' && in_string {
803 if let Some(&next) = chars.peek() {
805 match next {
806 '\'' => {
807 chars.next();
809 result.push_str("''");
810 }
811 '\\' => {
812 chars.next();
814 result.push_str("\\\\");
815 }
816 'n' | 'r' | 't' | '0' => {
817 result.push(c);
819 }
820 _ => {
821 result.push(c);
822 }
823 }
824 } else {
825 result.push(c);
826 }
827 } else {
828 result.push(c);
829 }
830 }
831 result
832 }
833
834 fn strip_conditional_comments(&self, stmt: &str) -> String {
836 let mut result = String::with_capacity(stmt.len());
837 let mut chars = stmt.chars().peekable();
838
839 while let Some(c) = chars.next() {
840 if c == '/' && chars.peek() == Some(&'*') {
841 chars.next(); if chars.peek() == Some(&'!') {
843 chars.next(); while chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
847 chars.next();
848 }
849 let mut depth = 1;
851 while depth > 0 {
852 match chars.next() {
853 Some('*') if chars.peek() == Some(&'/') => {
854 chars.next();
855 depth -= 1;
856 }
857 Some('/') if chars.peek() == Some(&'*') => {
858 chars.next();
859 depth += 1;
860 }
861 None => break,
862 _ => {}
863 }
864 }
865 } else {
866 result.push('/');
868 result.push('*');
869 }
870 } else {
871 result.push(c);
872 }
873 }
874 result
875 }
876
877 fn strip_engine_clause(&self, stmt: &str) -> String {
879 if self.to == SqlDialect::MySql {
880 return stmt.to_string();
881 }
882
883 let re = regex::Regex::new(r"(?i)\s*ENGINE\s*=\s*\w+").unwrap();
885 re.replace_all(stmt, "").to_string()
886 }
887
888 fn strip_charset_clauses(&self, stmt: &str) -> String {
890 if self.to == SqlDialect::MySql {
891 return stmt.to_string();
892 }
893
894 let result = stmt.to_string();
895 let re1 = regex::Regex::new(r"(?i)\s*(DEFAULT\s+)?CHARSET\s*=\s*\w+").unwrap();
896 let result = re1.replace_all(&result, "").to_string();
897
898 let re2 = regex::Regex::new(r"(?i)\s*COLLATE\s*=?\s*\w+").unwrap();
899 re2.replace_all(&result, "").to_string()
900 }
901
902 fn strip_postgres_casts(&self, stmt: &str) -> String {
904 use once_cell::sync::Lazy;
905 use regex::Regex;
906
907 static RE_CAST: Lazy<Regex> = Lazy::new(|| {
909 Regex::new(r"::[a-zA-Z_][a-zA-Z0-9_]*(?:\s+[a-zA-Z_][a-zA-Z0-9_]*)*").unwrap()
910 });
911
912 RE_CAST.replace_all(stmt, "").to_string()
913 }
914
915 fn convert_nextval(&self, stmt: &str) -> String {
917 use once_cell::sync::Lazy;
918 use regex::Regex;
919
920 static RE_NEXTVAL: Lazy<Regex> =
923 Lazy::new(|| Regex::new(r"(?i)\s*DEFAULT\s+nextval\s*\([^)]+\)").unwrap());
924
925 RE_NEXTVAL.replace_all(stmt, "").to_string()
926 }
927
928 fn convert_default_now(&self, stmt: &str) -> String {
930 use once_cell::sync::Lazy;
931 use regex::Regex;
932
933 static RE_NOW: Lazy<Regex> =
934 Lazy::new(|| Regex::new(r"(?i)\bDEFAULT\s+now\s*\(\s*\)").unwrap());
935
936 RE_NOW
937 .replace_all(stmt, "DEFAULT CURRENT_TIMESTAMP")
938 .to_string()
939 }
940
941 fn strip_schema_prefix(&self, stmt: &str) -> String {
943 use once_cell::sync::Lazy;
944 use regex::Regex;
945
946 static RE_SCHEMA: Lazy<Regex> =
949 Lazy::new(|| Regex::new(r#"(?i)\b(public|pg_catalog|pg_temp)\s*\.\s*"#).unwrap());
950
951 RE_SCHEMA.replace_all(stmt, "").to_string()
952 }
953
954 fn convert_mssql_getdate(&self, stmt: &str) -> String {
956 use once_cell::sync::Lazy;
957 use regex::Regex;
958
959 static RE_GETDATE: Lazy<Regex> =
960 Lazy::new(|| Regex::new(r"(?i)\bGETDATE\s*\(\s*\)").unwrap());
961 static RE_SYSDATETIME: Lazy<Regex> =
962 Lazy::new(|| Regex::new(r"(?i)\bSYSDATETIME\s*\(\s*\)").unwrap());
963 static RE_GETUTCDATE: Lazy<Regex> =
964 Lazy::new(|| Regex::new(r"(?i)\bGETUTCDATE\s*\(\s*\)").unwrap());
965
966 let result = RE_GETDATE
967 .replace_all(stmt, "CURRENT_TIMESTAMP")
968 .to_string();
969 let result = RE_SYSDATETIME
970 .replace_all(&result, "CURRENT_TIMESTAMP")
971 .to_string();
972 RE_GETUTCDATE
973 .replace_all(&result, "CURRENT_TIMESTAMP")
974 .to_string()
975 }
976
977 fn strip_mssql_on_filegroup(&self, stmt: &str) -> String {
979 use once_cell::sync::Lazy;
980 use regex::Regex;
981
982 static RE_ON_FILEGROUP: Lazy<Regex> =
984 Lazy::new(|| Regex::new(r"(?i)\s+ON\s*\[\s*\w+\s*\]").unwrap());
985
986 RE_ON_FILEGROUP.replace_all(stmt, "").to_string()
987 }
988
989 fn strip_mssql_clustered(&self, stmt: &str) -> String {
991 use once_cell::sync::Lazy;
992 use regex::Regex;
993
994 static RE_CLUSTERED: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bCLUSTERED\s+").unwrap());
995 static RE_NONCLUSTERED: Lazy<Regex> =
996 Lazy::new(|| Regex::new(r"(?i)\bNONCLUSTERED\s+").unwrap());
997
998 let result = RE_CLUSTERED.replace_all(stmt, "").to_string();
999 RE_NONCLUSTERED.replace_all(&result, "").to_string()
1000 }
1001
1002 fn convert_mssql_unicode_strings(&self, stmt: &str) -> String {
1004 use once_cell::sync::Lazy;
1005 use regex::Regex;
1006
1007 static RE_UNICODE_STRING: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bN'").unwrap());
1010
1011 RE_UNICODE_STRING.replace_all(stmt, "'").to_string()
1012 }
1013
1014 fn strip_mssql_schema_prefix(&self, stmt: &str) -> String {
1016 use once_cell::sync::Lazy;
1017 use regex::Regex;
1018
1019 static RE_MSSQL_SCHEMA: Lazy<Regex> =
1022 Lazy::new(|| Regex::new(r"(?i)\[?dbo\]?\s*\.\s*").unwrap());
1023
1024 RE_MSSQL_SCHEMA.replace_all(stmt, "").to_string()
1025 }
1026
1027 fn detect_unsupported_features(
1029 &mut self,
1030 stmt: &str,
1031 table_name: Option<&str>,
1032 ) -> Result<(), ConvertWarning> {
1033 let upper = stmt.to_uppercase();
1034
1035 if self.from == SqlDialect::MySql {
1037 if upper.contains("ENUM(") {
1039 let warning = ConvertWarning::UnsupportedFeature {
1040 feature: format!(
1041 "ENUM type{}",
1042 table_name
1043 .map(|t| format!(" in table {}", t))
1044 .unwrap_or_default()
1045 ),
1046 suggestion: Some(
1047 "Converted to VARCHAR - consider adding CHECK constraint".to_string(),
1048 ),
1049 };
1050 self.warnings.add(warning.clone());
1051 if self.strict {
1052 return Err(warning);
1053 }
1054 }
1055
1056 if upper.contains("SET(") {
1058 let warning = ConvertWarning::UnsupportedFeature {
1059 feature: format!(
1060 "SET type{}",
1061 table_name
1062 .map(|t| format!(" in table {}", t))
1063 .unwrap_or_default()
1064 ),
1065 suggestion: Some(
1066 "Converted to VARCHAR - SET semantics not preserved".to_string(),
1067 ),
1068 };
1069 self.warnings.add(warning.clone());
1070 if self.strict {
1071 return Err(warning);
1072 }
1073 }
1074
1075 if upper.contains("UNSIGNED") {
1077 self.warnings.add(ConvertWarning::UnsupportedFeature {
1078 feature: "UNSIGNED modifier".to_string(),
1079 suggestion: Some(
1080 "Removed - consider adding CHECK constraint for non-negative values"
1081 .to_string(),
1082 ),
1083 });
1084 }
1085 }
1086
1087 if self.from == SqlDialect::Postgres {
1089 if upper.contains("[]") || upper.contains("ARRAY[") {
1091 let warning = ConvertWarning::UnsupportedFeature {
1092 feature: format!(
1093 "Array type{}",
1094 table_name
1095 .map(|t| format!(" in table {}", t))
1096 .unwrap_or_default()
1097 ),
1098 suggestion: Some(
1099 "Array types not supported in target dialect - consider using JSON"
1100 .to_string(),
1101 ),
1102 };
1103 self.warnings.add(warning.clone());
1104 if self.strict {
1105 return Err(warning);
1106 }
1107 }
1108
1109 if upper.contains("INHERITS") {
1111 let warning = ConvertWarning::UnsupportedFeature {
1112 feature: "Table inheritance (INHERITS)".to_string(),
1113 suggestion: Some(
1114 "PostgreSQL table inheritance not supported in target dialect".to_string(),
1115 ),
1116 };
1117 self.warnings.add(warning.clone());
1118 if self.strict {
1119 return Err(warning);
1120 }
1121 }
1122
1123 if upper.contains("PARTITION BY") && self.to == SqlDialect::Sqlite {
1125 let warning = ConvertWarning::UnsupportedFeature {
1126 feature: "Table partitioning".to_string(),
1127 suggestion: Some("Partitioning not supported in SQLite".to_string()),
1128 };
1129 self.warnings.add(warning.clone());
1130 if self.strict {
1131 return Err(warning);
1132 }
1133 }
1134 }
1135
1136 Ok(())
1137 }
1138
1139 pub fn warnings(&self) -> &[ConvertWarning] {
1141 self.warnings.warnings()
1142 }
1143}
1144
1145pub fn run(config: ConvertConfig) -> anyhow::Result<ConvertStats> {
1147 let mut stats = ConvertStats::default();
1148
1149 let from_dialect = if let Some(d) = config.from_dialect {
1151 d
1152 } else {
1153 let result = crate::parser::detect_dialect_from_file(&config.input)?;
1154 if config.progress {
1155 eprintln!(
1156 "Auto-detected source dialect: {} (confidence: {:?})",
1157 result.dialect, result.confidence
1158 );
1159 }
1160 result.dialect
1161 };
1162
1163 if from_dialect == config.to_dialect {
1165 anyhow::bail!(
1166 "Source and target dialects are the same ({}). No conversion needed.",
1167 from_dialect
1168 );
1169 }
1170
1171 let file_size = std::fs::metadata(&config.input)?.len();
1173
1174 let progress_bar = if config.progress {
1175 let pb = ProgressBar::new(file_size);
1176 pb.set_style(
1177 ProgressStyle::with_template(
1178 "{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {bytes}/{total_bytes} ({percent}%) {msg}",
1179 )
1180 .unwrap()
1181 .progress_chars("█▓▒░ ")
1182 .tick_chars("⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"),
1183 );
1184 pb.enable_steady_tick(std::time::Duration::from_millis(100));
1185 pb.set_message("Converting...");
1186 Some(pb)
1187 } else {
1188 None
1189 };
1190
1191 let mut converter = Converter::new(from_dialect, config.to_dialect).with_strict(config.strict);
1193
1194 let file = File::open(&config.input)?;
1196 let compression = Compression::from_path(&config.input);
1197 let reader: Box<dyn Read> = if let Some(ref pb) = progress_bar {
1198 let pb_clone = pb.clone();
1199 let progress_reader = ProgressReader::new(file, move |bytes| {
1200 pb_clone.set_position(bytes);
1201 });
1202 compression.wrap_reader(Box::new(progress_reader))
1203 } else {
1204 compression.wrap_reader(Box::new(file))
1205 };
1206 let mut parser = Parser::with_dialect(reader, 64 * 1024, from_dialect);
1207
1208 let mut writer: Box<dyn Write> = if config.dry_run {
1210 Box::new(std::io::sink())
1211 } else {
1212 match &config.output {
1213 Some(path) => {
1214 if let Some(parent) = path.parent() {
1215 std::fs::create_dir_all(parent)?;
1216 }
1217 Box::new(BufWriter::with_capacity(256 * 1024, File::create(path)?))
1218 }
1219 None => Box::new(BufWriter::new(std::io::stdout())),
1220 }
1221 };
1222
1223 if !config.dry_run {
1225 write_header(&mut writer, &config, from_dialect)?;
1226 }
1227
1228 while let Some(stmt) = parser.read_statement()? {
1230 stats.statements_processed += 1;
1231
1232 if converter.has_pending_copy() {
1234 match converter.process_copy_data(&stmt) {
1236 Ok(inserts) => {
1237 for insert in inserts {
1238 if !insert.is_empty() {
1239 stats.statements_converted += 1;
1240 if !config.dry_run {
1241 writer.write_all(&insert)?;
1242 writer.write_all(b"\n")?;
1243 }
1244 }
1245 }
1246 }
1247 Err(warning) => {
1248 stats.warnings.push(warning);
1249 stats.statements_skipped += 1;
1250 }
1251 }
1252 continue;
1253 }
1254
1255 match converter.convert_statement(&stmt) {
1256 Ok(converted) => {
1257 if converted.is_empty() {
1258 stats.statements_skipped += 1;
1259 } else if converted == stmt {
1260 stats.statements_unchanged += 1;
1261 if !config.dry_run {
1262 writer.write_all(&converted)?;
1263 writer.write_all(b"\n")?;
1264 }
1265 } else {
1266 stats.statements_converted += 1;
1267 if !config.dry_run {
1268 writer.write_all(&converted)?;
1269 writer.write_all(b"\n")?;
1270 }
1271 }
1272 }
1273 Err(warning) => {
1274 stats.warnings.push(warning);
1275 stats.statements_skipped += 1;
1276 }
1277 }
1278 }
1279
1280 stats.warnings.extend(converter.warnings().iter().cloned());
1282
1283 if let Some(pb) = progress_bar {
1284 pb.finish_with_message("done");
1285 }
1286
1287 Ok(stats)
1288}
1289
1290fn write_header(
1292 writer: &mut dyn Write,
1293 config: &ConvertConfig,
1294 from: SqlDialect,
1295) -> std::io::Result<()> {
1296 writeln!(writer, "-- Converted by sql-splitter")?;
1297 writeln!(writer, "-- From: {} → To: {}", from, config.to_dialect)?;
1298 writeln!(writer, "-- Source: {}", config.input.display())?;
1299 writeln!(writer)?;
1300
1301 match config.to_dialect {
1303 SqlDialect::Postgres => {
1304 writeln!(writer, "SET client_encoding = 'UTF8';")?;
1305 writeln!(writer, "SET standard_conforming_strings = on;")?;
1306 }
1307 SqlDialect::Sqlite => {
1308 writeln!(writer, "PRAGMA foreign_keys = OFF;")?;
1309 }
1310 SqlDialect::MySql => {
1311 writeln!(writer, "SET NAMES utf8mb4;")?;
1312 writeln!(writer, "SET FOREIGN_KEY_CHECKS = 0;")?;
1313 }
1314 SqlDialect::Mssql => {
1315 writeln!(writer, "SET ANSI_NULLS ON;")?;
1316 writeln!(writer, "SET QUOTED_IDENTIFIER ON;")?;
1317 writeln!(writer, "SET NOCOUNT ON;")?;
1318 }
1319 }
1320 writeln!(writer)?;
1321
1322 Ok(())
1323}