1mod analyze;
2mod convert;
3mod diff;
4mod glob_util;
5mod merge;
6mod redact;
7mod sample;
8mod shard;
9mod split;
10mod validate;
11
12use clap::{CommandFactory, Parser, Subcommand, ValueHint};
13use clap_complete::{generate, Shell};
14use std::io;
15use std::path::PathBuf;
16
17const AFTER_HELP: &str = "\x1b[1mCommon workflows:\x1b[0m
18 Split a dump into per-table files:
19 sql-splitter split dump.sql -o tables/
20
21 Create a 10% sample for development:
22 sql-splitter sample dump.sql -o dev.sql --percent 10 --preserve-relations
23
24 Convert MySQL to PostgreSQL:
25 sql-splitter convert mysql.sql --to postgres -o pg.sql
26
27 Compare two dumps for changes:
28 sql-splitter diff old.sql new.sql --format sql -o migration.sql
29
30\x1b[1mMore info:\x1b[0m
31 Run 'sql-splitter <command> --help' for command-specific options.
32 Documentation: https://github.com/helgesverre/sql-splitter
33 Enable completions: sql-splitter completions <shell>";
34
35#[derive(Parser)]
36#[command(name = "sql-splitter")]
37#[command(author = "Helge Sverre <helge.sverre@gmail.com>")]
38#[command(version)]
39#[command(about = "High-performance CLI for splitting, merging, converting, and analyzing SQL dump files")]
40#[command(after_help = AFTER_HELP)]
41#[command(arg_required_else_help = true)]
42#[command(max_term_width = 100)]
43pub struct Cli {
44 #[command(subcommand)]
45 pub command: Commands,
46}
47
48const INPUT_OUTPUT: &str = "Input/Output";
50const FILTERING: &str = "Filtering";
51const MODE: &str = "Mode";
52const BEHAVIOR: &str = "Behavior";
53const LIMITS: &str = "Limits";
54const OUTPUT_FORMAT: &str = "Output";
55
56#[derive(Subcommand)]
57pub enum Commands {
58 #[command(visible_alias = "sp")]
60 #[command(after_help = "\x1b[1mExamples:\x1b[0m
61 sql-splitter split dump.sql -o tables/
62 sql-splitter split dump.sql.gz -o tables/ --tables users,orders
63 sql-splitter split dump.sql -o schema/ --schema-only
64 sql-splitter split \"backups/*.sql\" -o out/ --fail-fast")]
65 Split {
66 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
68 file: PathBuf,
69
70 #[arg(short, long, default_value = "output", value_hint = ValueHint::DirPath, help_heading = INPUT_OUTPUT)]
72 output: PathBuf,
73
74 #[arg(short, long, help_heading = INPUT_OUTPUT)]
76 dialect: Option<String>,
77
78 #[arg(short, long, help_heading = FILTERING)]
80 tables: Option<String>,
81
82 #[arg(long, conflicts_with = "data_only", help_heading = FILTERING)]
84 schema_only: bool,
85
86 #[arg(long, conflicts_with = "schema_only", help_heading = FILTERING)]
88 data_only: bool,
89
90 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
92 verbose: bool,
93
94 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
96 progress: bool,
97
98 #[arg(long, help_heading = OUTPUT_FORMAT)]
100 json: bool,
101
102 #[arg(long, help_heading = BEHAVIOR)]
104 dry_run: bool,
105
106 #[arg(long, help_heading = BEHAVIOR)]
108 fail_fast: bool,
109 },
110
111 #[command(visible_alias = "an")]
113 #[command(after_help = "\x1b[1mExamples:\x1b[0m
114 sql-splitter analyze dump.sql
115 sql-splitter analyze dump.sql.gz --progress
116 sql-splitter analyze \"dumps/*.sql\" --json")]
117 Analyze {
118 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
120 file: PathBuf,
121
122 #[arg(short, long, help_heading = INPUT_OUTPUT)]
124 dialect: Option<String>,
125
126 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
128 progress: bool,
129
130 #[arg(long, help_heading = OUTPUT_FORMAT)]
132 json: bool,
133
134 #[arg(long, help_heading = BEHAVIOR)]
136 fail_fast: bool,
137 },
138
139 #[command(visible_alias = "mg")]
141 #[command(after_help = "\x1b[1mExamples:\x1b[0m
142 sql-splitter merge tables/ -o restored.sql
143 sql-splitter merge tables/ -o restored.sql --transaction
144 sql-splitter merge tables/ -o partial.sql --tables users,orders
145 sql-splitter merge tables/ -o clean.sql --exclude logs,cache")]
146 Merge {
147 #[arg(value_hint = ValueHint::DirPath, help_heading = INPUT_OUTPUT)]
149 input_dir: PathBuf,
150
151 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
153 output: Option<PathBuf>,
154
155 #[arg(short, long, default_value = "mysql", help_heading = INPUT_OUTPUT)]
157 dialect: Option<String>,
158
159 #[arg(short, long, help_heading = FILTERING)]
161 tables: Option<String>,
162
163 #[arg(short, long, help_heading = FILTERING)]
165 exclude: Option<String>,
166
167 #[arg(long, help_heading = BEHAVIOR)]
169 transaction: bool,
170
171 #[arg(long, help_heading = BEHAVIOR)]
173 no_header: bool,
174
175 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
177 progress: bool,
178
179 #[arg(long, help_heading = OUTPUT_FORMAT)]
181 json: bool,
182
183 #[arg(long, help_heading = BEHAVIOR)]
185 dry_run: bool,
186 },
187
188 #[command(visible_alias = "sa")]
190 #[command(after_help = "\x1b[1mExamples:\x1b[0m
191 sql-splitter sample dump.sql -o dev.sql --percent 10
192 sql-splitter sample dump.sql -o dev.sql --rows 1000 --preserve-relations
193 sql-splitter sample dump.sql -o dev.sql --percent 5 --seed 42
194 sql-splitter sample dump.sql -o dev.sql --tables users,orders --percent 20")]
195 Sample {
196 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
198 file: PathBuf,
199
200 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
202 output: Option<PathBuf>,
203
204 #[arg(short, long, help_heading = INPUT_OUTPUT)]
206 dialect: Option<String>,
207
208 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
210 config: Option<PathBuf>,
211
212 #[arg(long, conflicts_with = "rows", help_heading = MODE)]
214 percent: Option<u32>,
215
216 #[arg(long, conflicts_with = "percent", help_heading = MODE)]
218 rows: Option<usize>,
219
220 #[arg(long, help_heading = MODE)]
222 seed: Option<u64>,
223
224 #[arg(short, long, help_heading = FILTERING)]
226 tables: Option<String>,
227
228 #[arg(short, long, help_heading = FILTERING)]
230 exclude: Option<String>,
231
232 #[arg(long, help_heading = FILTERING)]
234 root_tables: Option<String>,
235
236 #[arg(long, default_value = "lookups", help_heading = FILTERING)]
238 include_global: Option<String>,
239
240 #[arg(long, help_heading = BEHAVIOR)]
242 preserve_relations: bool,
243
244 #[arg(long, help_heading = BEHAVIOR)]
246 strict_fk: bool,
247
248 #[arg(long, help_heading = BEHAVIOR)]
250 no_schema: bool,
251
252 #[arg(long, help_heading = LIMITS)]
254 max_total_rows: Option<usize>,
255
256 #[arg(long, help_heading = LIMITS)]
258 no_limit: bool,
259
260 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
262 progress: bool,
263
264 #[arg(long, help_heading = OUTPUT_FORMAT)]
266 json: bool,
267
268 #[arg(long, help_heading = BEHAVIOR)]
270 dry_run: bool,
271 },
272
273 #[command(visible_alias = "sh")]
275 #[command(after_help = "\x1b[1mExamples:\x1b[0m
276 sql-splitter shard dump.sql -o tenant.sql --tenant-value 123
277 sql-splitter shard dump.sql -o tenant.sql --tenant-column company_id --tenant-value 42
278 sql-splitter shard dump.sql -o shards/ --tenant-values \"1,2,3\"")]
279 Shard {
280 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
282 file: PathBuf,
283
284 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
286 output: Option<PathBuf>,
287
288 #[arg(short, long, help_heading = INPUT_OUTPUT)]
290 dialect: Option<String>,
291
292 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
294 config: Option<PathBuf>,
295
296 #[arg(long, help_heading = MODE)]
298 tenant_column: Option<String>,
299
300 #[arg(long, conflicts_with = "tenant_values", help_heading = MODE)]
302 tenant_value: Option<String>,
303
304 #[arg(long, conflicts_with = "tenant_value", help_heading = MODE)]
306 tenant_values: Option<String>,
307
308 #[arg(long, help_heading = FILTERING)]
310 root_tables: Option<String>,
311
312 #[arg(long, default_value = "lookups", help_heading = FILTERING)]
314 include_global: Option<String>,
315
316 #[arg(long, help_heading = BEHAVIOR)]
318 strict_fk: bool,
319
320 #[arg(long, help_heading = BEHAVIOR)]
322 no_schema: bool,
323
324 #[arg(long, help_heading = LIMITS)]
326 max_selected_rows: Option<usize>,
327
328 #[arg(long, help_heading = LIMITS)]
330 no_limit: bool,
331
332 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
334 progress: bool,
335
336 #[arg(long, help_heading = OUTPUT_FORMAT)]
338 json: bool,
339
340 #[arg(long, help_heading = BEHAVIOR)]
342 dry_run: bool,
343 },
344
345 #[command(visible_alias = "cv")]
347 #[command(after_help = "\x1b[1mExamples:\x1b[0m
348 sql-splitter convert mysql.sql --to postgres -o pg.sql
349 sql-splitter convert pg_dump.sql --to mysql -o mysql.sql
350 sql-splitter convert dump.sql --from mysql --to sqlite -o sqlite.sql
351 sql-splitter convert mysql.sql --to postgres | psql mydb")]
352 Convert {
353 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
355 file: PathBuf,
356
357 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
359 output: Option<PathBuf>,
360
361 #[arg(long, help_heading = MODE)]
363 from: Option<String>,
364
365 #[arg(long, help_heading = MODE)]
367 to: String,
368
369 #[arg(long, help_heading = BEHAVIOR)]
371 strict: bool,
372
373 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
375 progress: bool,
376
377 #[arg(long, help_heading = OUTPUT_FORMAT)]
379 json: bool,
380
381 #[arg(long, help_heading = BEHAVIOR)]
383 dry_run: bool,
384
385 #[arg(long, help_heading = BEHAVIOR)]
387 fail_fast: bool,
388 },
389
390 #[command(visible_alias = "val")]
392 #[command(after_help = "\x1b[1mExamples:\x1b[0m
393 sql-splitter validate dump.sql
394 sql-splitter validate dump.sql --strict
395 sql-splitter validate \"dumps/*.sql\" --json --fail-fast
396 sql-splitter validate dump.sql --no-fk-checks")]
397 Validate {
398 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
400 file: PathBuf,
401
402 #[arg(short, long, help_heading = INPUT_OUTPUT)]
404 dialect: Option<String>,
405
406 #[arg(long, help_heading = BEHAVIOR)]
408 strict: bool,
409
410 #[arg(long, help_heading = BEHAVIOR)]
412 no_fk_checks: bool,
413
414 #[arg(long, help_heading = BEHAVIOR)]
416 fail_fast: bool,
417
418 #[arg(long, default_value = "1000000", help_heading = LIMITS)]
420 max_rows_per_table: usize,
421
422 #[arg(long, help_heading = LIMITS)]
424 no_limit: bool,
425
426 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
428 progress: bool,
429
430 #[arg(long, help_heading = OUTPUT_FORMAT)]
432 json: bool,
433 },
434
435 #[command(visible_alias = "df")]
437 #[command(after_help = "\x1b[1mExamples:\x1b[0m
438 sql-splitter diff old.sql new.sql
439 sql-splitter diff old.sql new.sql --schema-only
440 sql-splitter diff old.sql new.sql --format sql -o migration.sql
441 sql-splitter diff old.sql new.sql --verbose --ignore-columns \"*.updated_at\"
442 sql-splitter diff old.sql new.sql --primary-key logs:timestamp+message")]
443 Diff {
444 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
446 old_file: PathBuf,
447
448 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
450 new_file: PathBuf,
451
452 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
454 output: Option<PathBuf>,
455
456 #[arg(short, long, help_heading = INPUT_OUTPUT)]
458 dialect: Option<String>,
459
460 #[arg(short, long, help_heading = FILTERING)]
462 tables: Option<String>,
463
464 #[arg(short, long, help_heading = FILTERING)]
466 exclude: Option<String>,
467
468 #[arg(long, help_heading = FILTERING)]
470 ignore_columns: Option<String>,
471
472 #[arg(long, conflicts_with = "data_only", help_heading = MODE)]
474 schema_only: bool,
475
476 #[arg(long, conflicts_with = "schema_only", help_heading = MODE)]
478 data_only: bool,
479
480 #[arg(long, help_heading = MODE)]
482 primary_key: Option<String>,
483
484 #[arg(long, help_heading = BEHAVIOR)]
486 allow_no_pk: bool,
487
488 #[arg(long, help_heading = BEHAVIOR)]
490 ignore_order: bool,
491
492 #[arg(long, default_value = "10000000", help_heading = LIMITS)]
494 max_pk_entries: usize,
495
496 #[arg(short, long, default_value = "text", help_heading = OUTPUT_FORMAT)]
498 format: Option<String>,
499
500 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
502 verbose: bool,
503
504 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
506 progress: bool,
507 },
508
509 #[command(visible_alias = "rd")]
511 #[command(after_help = "\x1b[1mExamples:\x1b[0m
512 sql-splitter redact dump.sql -o safe.sql --config redact.yaml
513 sql-splitter redact dump.sql -o safe.sql --null \"*.ssn\" --hash \"*.email\"
514 sql-splitter redact dump.sql --generate-config -o redact.yaml
515 sql-splitter redact dump.sql -o safe.sql --config redact.yaml --seed 42")]
516 Redact {
517 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
519 file: PathBuf,
520
521 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
523 output: Option<PathBuf>,
524
525 #[arg(short, long, help_heading = INPUT_OUTPUT)]
527 dialect: Option<String>,
528
529 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
531 config: Option<PathBuf>,
532
533 #[arg(long, help_heading = MODE)]
535 generate_config: bool,
536
537 #[arg(long, value_delimiter = ',', help_heading = "Inline Strategies")]
539 null: Vec<String>,
540
541 #[arg(long, value_delimiter = ',', help_heading = "Inline Strategies")]
543 hash: Vec<String>,
544
545 #[arg(long, value_delimiter = ',', help_heading = "Inline Strategies")]
547 fake: Vec<String>,
548
549 #[arg(long, value_delimiter = ',', help_heading = "Inline Strategies")]
551 mask: Vec<String>,
552
553 #[arg(long, value_delimiter = ',', help_heading = "Inline Strategies")]
555 constant: Vec<String>,
556
557 #[arg(long, help_heading = MODE)]
559 seed: Option<u64>,
560
561 #[arg(long, default_value = "en", help_heading = MODE)]
563 locale: String,
564
565 #[arg(short, long, value_delimiter = ',', help_heading = FILTERING)]
567 tables: Vec<String>,
568
569 #[arg(short = 'x', long, value_delimiter = ',', help_heading = FILTERING)]
571 exclude: Vec<String>,
572
573 #[arg(long, help_heading = BEHAVIOR)]
575 strict: bool,
576
577 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
579 progress: bool,
580
581 #[arg(long, help_heading = BEHAVIOR)]
583 dry_run: bool,
584
585 #[arg(long, help_heading = OUTPUT_FORMAT)]
587 json: bool,
588
589 #[arg(long, help_heading = BEHAVIOR)]
591 validate: bool,
592 },
593
594 #[command(after_help = "\x1b[1mInstallation:\x1b[0m
596 Bash:
597 sql-splitter completions bash > /etc/bash_completion.d/sql-splitter
598 # or: sql-splitter completions bash >> ~/.bashrc
599
600 Zsh:
601 sql-splitter completions zsh > \"${fpath[1]}/_sql-splitter\"
602 # or for oh-my-zsh: sql-splitter completions zsh > ~/.oh-my-zsh/completions/_sql-splitter
603
604 Fish:
605 sql-splitter completions fish > ~/.config/fish/completions/sql-splitter.fish
606
607 PowerShell:
608 sql-splitter completions powershell >> $PROFILE")]
609 Completions {
610 #[arg(value_enum)]
612 shell: Shell,
613 },
614}
615
616pub fn run(cli: Cli) -> anyhow::Result<()> {
617 match cli.command {
618 Commands::Split {
619 file,
620 output,
621 dialect,
622 verbose,
623 dry_run,
624 progress,
625 tables,
626 schema_only,
627 data_only,
628 fail_fast,
629 json,
630 } => split::run(
631 file,
632 output,
633 dialect,
634 verbose,
635 dry_run,
636 progress,
637 tables,
638 schema_only,
639 data_only,
640 fail_fast,
641 json,
642 ),
643 Commands::Analyze {
644 file,
645 dialect,
646 progress,
647 fail_fast,
648 json,
649 } => analyze::run(file, dialect, progress, fail_fast, json),
650 Commands::Merge {
651 input_dir,
652 output,
653 dialect,
654 tables,
655 exclude,
656 transaction,
657 no_header,
658 progress,
659 dry_run,
660 json,
661 } => merge::run(
662 input_dir,
663 output,
664 dialect,
665 tables,
666 exclude,
667 transaction,
668 no_header,
669 progress,
670 dry_run,
671 json,
672 ),
673 Commands::Sample {
674 file,
675 output,
676 dialect,
677 percent,
678 rows,
679 preserve_relations,
680 tables,
681 exclude,
682 root_tables,
683 include_global,
684 seed,
685 config,
686 max_total_rows,
687 no_limit,
688 strict_fk,
689 no_schema,
690 progress,
691 dry_run,
692 json,
693 } => {
694 let effective_limit = if no_limit || max_total_rows == Some(0) {
695 None
696 } else {
697 max_total_rows
698 };
699 sample::run(
700 file,
701 output,
702 dialect,
703 percent,
704 rows,
705 preserve_relations,
706 tables,
707 exclude,
708 root_tables,
709 include_global,
710 seed,
711 config,
712 effective_limit,
713 strict_fk,
714 no_schema,
715 progress,
716 dry_run,
717 json,
718 )
719 }
720 Commands::Shard {
721 file,
722 output,
723 dialect,
724 tenant_column,
725 tenant_value,
726 tenant_values,
727 root_tables,
728 include_global,
729 config,
730 max_selected_rows,
731 no_limit,
732 strict_fk,
733 no_schema,
734 progress,
735 dry_run,
736 json,
737 } => {
738 let effective_limit = if no_limit || max_selected_rows == Some(0) {
739 None
740 } else {
741 max_selected_rows
742 };
743 shard::run(
744 file,
745 output,
746 dialect,
747 tenant_column,
748 tenant_value,
749 tenant_values,
750 root_tables,
751 include_global,
752 config,
753 effective_limit,
754 strict_fk,
755 no_schema,
756 progress,
757 dry_run,
758 json,
759 )
760 }
761 Commands::Convert {
762 file,
763 output,
764 from,
765 to,
766 strict,
767 progress,
768 dry_run,
769 fail_fast,
770 json,
771 } => convert::run(
772 file, output, from, to, strict, progress, dry_run, fail_fast, json,
773 ),
774 Commands::Validate {
775 file,
776 dialect,
777 progress,
778 strict,
779 json,
780 max_rows_per_table,
781 no_limit,
782 no_fk_checks,
783 fail_fast,
784 } => {
785 let effective_limit = if no_limit || max_rows_per_table == 0 {
786 usize::MAX
787 } else {
788 max_rows_per_table
789 };
790 validate::run(
791 file,
792 dialect,
793 progress,
794 strict,
795 json,
796 effective_limit,
797 no_fk_checks,
798 fail_fast,
799 )
800 }
801 Commands::Diff {
802 old_file,
803 new_file,
804 output,
805 tables,
806 exclude,
807 schema_only,
808 data_only,
809 format,
810 dialect,
811 verbose,
812 progress,
813 max_pk_entries,
814 allow_no_pk,
815 ignore_order,
816 primary_key,
817 ignore_columns,
818 } => diff::run(
819 old_file,
820 new_file,
821 output,
822 tables,
823 exclude,
824 schema_only,
825 data_only,
826 format,
827 dialect,
828 verbose,
829 progress,
830 max_pk_entries,
831 allow_no_pk,
832 ignore_order,
833 primary_key,
834 ignore_columns,
835 ),
836 Commands::Redact {
837 file,
838 output,
839 dialect,
840 config,
841 generate_config,
842 null,
843 hash,
844 fake,
845 mask,
846 constant,
847 seed,
848 locale,
849 tables,
850 exclude,
851 strict,
852 progress,
853 dry_run,
854 json,
855 validate,
856 } => redact::run(
857 file,
858 output,
859 dialect,
860 config,
861 generate_config,
862 null,
863 hash,
864 fake,
865 mask,
866 constant,
867 seed,
868 locale,
869 tables,
870 exclude,
871 strict,
872 progress,
873 dry_run,
874 json,
875 validate,
876 ),
877 Commands::Completions { shell } => {
878 generate(
879 shell,
880 &mut Cli::command(),
881 "sql-splitter",
882 &mut io::stdout(),
883 );
884 Ok(())
885 }
886 }
887}