1mod analyze;
2mod convert;
3mod diff;
4mod glob_util;
5mod merge;
6mod sample;
7mod shard;
8mod split;
9mod validate;
10
11use clap::{CommandFactory, Parser, Subcommand, ValueHint};
12use clap_complete::{generate, Shell};
13use std::io;
14use std::path::PathBuf;
15
16const AFTER_HELP: &str = "\x1b[1mCommon workflows:\x1b[0m
17 Split a dump into per-table files:
18 sql-splitter split dump.sql -o tables/
19
20 Create a 10% sample for development:
21 sql-splitter sample dump.sql -o dev.sql --percent 10 --preserve-relations
22
23 Convert MySQL to PostgreSQL:
24 sql-splitter convert mysql.sql --to postgres -o pg.sql
25
26 Compare two dumps for changes:
27 sql-splitter diff old.sql new.sql --format sql -o migration.sql
28
29\x1b[1mMore info:\x1b[0m
30 Run 'sql-splitter <command> --help' for command-specific options.
31 Documentation: https://github.com/helgesverre/sql-splitter
32 Enable completions: sql-splitter completions <shell>";
33
34#[derive(Parser)]
35#[command(name = "sql-splitter")]
36#[command(author = "Helge Sverre <helge.sverre@gmail.com>")]
37#[command(version)]
38#[command(about = "High-performance CLI for splitting, merging, converting, and analyzing SQL dump files")]
39#[command(after_help = AFTER_HELP)]
40#[command(arg_required_else_help = true)]
41#[command(max_term_width = 100)]
42pub struct Cli {
43 #[command(subcommand)]
44 pub command: Commands,
45}
46
47const INPUT_OUTPUT: &str = "Input/Output";
49const FILTERING: &str = "Filtering";
50const MODE: &str = "Mode";
51const BEHAVIOR: &str = "Behavior";
52const LIMITS: &str = "Limits";
53const OUTPUT_FORMAT: &str = "Output";
54
55#[derive(Subcommand)]
56pub enum Commands {
57 #[command(visible_alias = "sp")]
59 #[command(after_help = "\x1b[1mExamples:\x1b[0m
60 sql-splitter split dump.sql -o tables/
61 sql-splitter split dump.sql.gz -o tables/ --tables users,orders
62 sql-splitter split dump.sql -o schema/ --schema-only
63 sql-splitter split \"backups/*.sql\" -o out/ --fail-fast")]
64 Split {
65 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
67 file: PathBuf,
68
69 #[arg(short, long, default_value = "output", value_hint = ValueHint::DirPath, help_heading = INPUT_OUTPUT)]
71 output: PathBuf,
72
73 #[arg(short, long, help_heading = INPUT_OUTPUT)]
75 dialect: Option<String>,
76
77 #[arg(short, long, help_heading = FILTERING)]
79 tables: Option<String>,
80
81 #[arg(long, conflicts_with = "data_only", help_heading = FILTERING)]
83 schema_only: bool,
84
85 #[arg(long, conflicts_with = "schema_only", help_heading = FILTERING)]
87 data_only: bool,
88
89 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
91 verbose: bool,
92
93 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
95 progress: bool,
96
97 #[arg(long, help_heading = OUTPUT_FORMAT)]
99 json: bool,
100
101 #[arg(long, help_heading = BEHAVIOR)]
103 dry_run: bool,
104
105 #[arg(long, help_heading = BEHAVIOR)]
107 fail_fast: bool,
108 },
109
110 #[command(visible_alias = "an")]
112 #[command(after_help = "\x1b[1mExamples:\x1b[0m
113 sql-splitter analyze dump.sql
114 sql-splitter analyze dump.sql.gz --progress
115 sql-splitter analyze \"dumps/*.sql\" --json")]
116 Analyze {
117 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
119 file: PathBuf,
120
121 #[arg(short, long, help_heading = INPUT_OUTPUT)]
123 dialect: Option<String>,
124
125 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
127 progress: bool,
128
129 #[arg(long, help_heading = OUTPUT_FORMAT)]
131 json: bool,
132
133 #[arg(long, help_heading = BEHAVIOR)]
135 fail_fast: bool,
136 },
137
138 #[command(visible_alias = "mg")]
140 #[command(after_help = "\x1b[1mExamples:\x1b[0m
141 sql-splitter merge tables/ -o restored.sql
142 sql-splitter merge tables/ -o restored.sql --transaction
143 sql-splitter merge tables/ -o partial.sql --tables users,orders
144 sql-splitter merge tables/ -o clean.sql --exclude logs,cache")]
145 Merge {
146 #[arg(value_hint = ValueHint::DirPath, help_heading = INPUT_OUTPUT)]
148 input_dir: PathBuf,
149
150 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
152 output: Option<PathBuf>,
153
154 #[arg(short, long, default_value = "mysql", help_heading = INPUT_OUTPUT)]
156 dialect: Option<String>,
157
158 #[arg(short, long, help_heading = FILTERING)]
160 tables: Option<String>,
161
162 #[arg(short, long, help_heading = FILTERING)]
164 exclude: Option<String>,
165
166 #[arg(long, help_heading = BEHAVIOR)]
168 transaction: bool,
169
170 #[arg(long, help_heading = BEHAVIOR)]
172 no_header: bool,
173
174 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
176 progress: bool,
177
178 #[arg(long, help_heading = OUTPUT_FORMAT)]
180 json: bool,
181
182 #[arg(long, help_heading = BEHAVIOR)]
184 dry_run: bool,
185 },
186
187 #[command(visible_alias = "sa")]
189 #[command(after_help = "\x1b[1mExamples:\x1b[0m
190 sql-splitter sample dump.sql -o dev.sql --percent 10
191 sql-splitter sample dump.sql -o dev.sql --rows 1000 --preserve-relations
192 sql-splitter sample dump.sql -o dev.sql --percent 5 --seed 42
193 sql-splitter sample dump.sql -o dev.sql --tables users,orders --percent 20")]
194 Sample {
195 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
197 file: PathBuf,
198
199 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
201 output: Option<PathBuf>,
202
203 #[arg(short, long, help_heading = INPUT_OUTPUT)]
205 dialect: Option<String>,
206
207 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
209 config: Option<PathBuf>,
210
211 #[arg(long, conflicts_with = "rows", help_heading = MODE)]
213 percent: Option<u32>,
214
215 #[arg(long, conflicts_with = "percent", help_heading = MODE)]
217 rows: Option<usize>,
218
219 #[arg(long, help_heading = MODE)]
221 seed: Option<u64>,
222
223 #[arg(short, long, help_heading = FILTERING)]
225 tables: Option<String>,
226
227 #[arg(short, long, help_heading = FILTERING)]
229 exclude: Option<String>,
230
231 #[arg(long, help_heading = FILTERING)]
233 root_tables: Option<String>,
234
235 #[arg(long, default_value = "lookups", help_heading = FILTERING)]
237 include_global: Option<String>,
238
239 #[arg(long, help_heading = BEHAVIOR)]
241 preserve_relations: bool,
242
243 #[arg(long, help_heading = BEHAVIOR)]
245 strict_fk: bool,
246
247 #[arg(long, help_heading = BEHAVIOR)]
249 no_schema: bool,
250
251 #[arg(long, help_heading = LIMITS)]
253 max_total_rows: Option<usize>,
254
255 #[arg(long, help_heading = LIMITS)]
257 no_limit: bool,
258
259 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
261 progress: bool,
262
263 #[arg(long, help_heading = OUTPUT_FORMAT)]
265 json: bool,
266
267 #[arg(long, help_heading = BEHAVIOR)]
269 dry_run: bool,
270 },
271
272 #[command(visible_alias = "sh")]
274 #[command(after_help = "\x1b[1mExamples:\x1b[0m
275 sql-splitter shard dump.sql -o tenant.sql --tenant-value 123
276 sql-splitter shard dump.sql -o tenant.sql --tenant-column company_id --tenant-value 42
277 sql-splitter shard dump.sql -o shards/ --tenant-values \"1,2,3\"")]
278 Shard {
279 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
281 file: PathBuf,
282
283 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
285 output: Option<PathBuf>,
286
287 #[arg(short, long, help_heading = INPUT_OUTPUT)]
289 dialect: Option<String>,
290
291 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
293 config: Option<PathBuf>,
294
295 #[arg(long, help_heading = MODE)]
297 tenant_column: Option<String>,
298
299 #[arg(long, conflicts_with = "tenant_values", help_heading = MODE)]
301 tenant_value: Option<String>,
302
303 #[arg(long, conflicts_with = "tenant_value", help_heading = MODE)]
305 tenant_values: Option<String>,
306
307 #[arg(long, help_heading = FILTERING)]
309 root_tables: Option<String>,
310
311 #[arg(long, default_value = "lookups", help_heading = FILTERING)]
313 include_global: Option<String>,
314
315 #[arg(long, help_heading = BEHAVIOR)]
317 strict_fk: bool,
318
319 #[arg(long, help_heading = BEHAVIOR)]
321 no_schema: bool,
322
323 #[arg(long, help_heading = LIMITS)]
325 max_selected_rows: Option<usize>,
326
327 #[arg(long, help_heading = LIMITS)]
329 no_limit: bool,
330
331 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
333 progress: bool,
334
335 #[arg(long, help_heading = OUTPUT_FORMAT)]
337 json: bool,
338
339 #[arg(long, help_heading = BEHAVIOR)]
341 dry_run: bool,
342 },
343
344 #[command(visible_alias = "cv")]
346 #[command(after_help = "\x1b[1mExamples:\x1b[0m
347 sql-splitter convert mysql.sql --to postgres -o pg.sql
348 sql-splitter convert pg_dump.sql --to mysql -o mysql.sql
349 sql-splitter convert dump.sql --from mysql --to sqlite -o sqlite.sql
350 sql-splitter convert mysql.sql --to postgres | psql mydb")]
351 Convert {
352 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
354 file: PathBuf,
355
356 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
358 output: Option<PathBuf>,
359
360 #[arg(long, help_heading = MODE)]
362 from: Option<String>,
363
364 #[arg(long, help_heading = MODE)]
366 to: String,
367
368 #[arg(long, help_heading = BEHAVIOR)]
370 strict: bool,
371
372 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
374 progress: bool,
375
376 #[arg(long, help_heading = OUTPUT_FORMAT)]
378 json: bool,
379
380 #[arg(long, help_heading = BEHAVIOR)]
382 dry_run: bool,
383
384 #[arg(long, help_heading = BEHAVIOR)]
386 fail_fast: bool,
387 },
388
389 #[command(visible_alias = "val")]
391 #[command(after_help = "\x1b[1mExamples:\x1b[0m
392 sql-splitter validate dump.sql
393 sql-splitter validate dump.sql --strict
394 sql-splitter validate \"dumps/*.sql\" --json --fail-fast
395 sql-splitter validate dump.sql --no-fk-checks")]
396 Validate {
397 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
399 file: PathBuf,
400
401 #[arg(short, long, help_heading = INPUT_OUTPUT)]
403 dialect: Option<String>,
404
405 #[arg(long, help_heading = BEHAVIOR)]
407 strict: bool,
408
409 #[arg(long, help_heading = BEHAVIOR)]
411 no_fk_checks: bool,
412
413 #[arg(long, help_heading = BEHAVIOR)]
415 fail_fast: bool,
416
417 #[arg(long, default_value = "1000000", help_heading = LIMITS)]
419 max_rows_per_table: usize,
420
421 #[arg(long, help_heading = LIMITS)]
423 no_limit: bool,
424
425 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
427 progress: bool,
428
429 #[arg(long, help_heading = OUTPUT_FORMAT)]
431 json: bool,
432 },
433
434 #[command(visible_alias = "df")]
436 #[command(after_help = "\x1b[1mExamples:\x1b[0m
437 sql-splitter diff old.sql new.sql
438 sql-splitter diff old.sql new.sql --schema-only
439 sql-splitter diff old.sql new.sql --format sql -o migration.sql
440 sql-splitter diff old.sql new.sql --verbose --ignore-columns \"*.updated_at\"
441 sql-splitter diff old.sql new.sql --primary-key logs:timestamp+message")]
442 Diff {
443 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
445 old_file: PathBuf,
446
447 #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
449 new_file: PathBuf,
450
451 #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
453 output: Option<PathBuf>,
454
455 #[arg(short, long, help_heading = INPUT_OUTPUT)]
457 dialect: Option<String>,
458
459 #[arg(short, long, help_heading = FILTERING)]
461 tables: Option<String>,
462
463 #[arg(short, long, help_heading = FILTERING)]
465 exclude: Option<String>,
466
467 #[arg(long, help_heading = FILTERING)]
469 ignore_columns: Option<String>,
470
471 #[arg(long, conflicts_with = "data_only", help_heading = MODE)]
473 schema_only: bool,
474
475 #[arg(long, conflicts_with = "schema_only", help_heading = MODE)]
477 data_only: bool,
478
479 #[arg(long, help_heading = MODE)]
481 primary_key: Option<String>,
482
483 #[arg(long, help_heading = BEHAVIOR)]
485 allow_no_pk: bool,
486
487 #[arg(long, help_heading = BEHAVIOR)]
489 ignore_order: bool,
490
491 #[arg(long, default_value = "10000000", help_heading = LIMITS)]
493 max_pk_entries: usize,
494
495 #[arg(short, long, default_value = "text", help_heading = OUTPUT_FORMAT)]
497 format: Option<String>,
498
499 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
501 verbose: bool,
502
503 #[arg(short, long, help_heading = OUTPUT_FORMAT)]
505 progress: bool,
506 },
507
508 #[command(after_help = "\x1b[1mInstallation:\x1b[0m
510 Bash:
511 sql-splitter completions bash > /etc/bash_completion.d/sql-splitter
512 # or: sql-splitter completions bash >> ~/.bashrc
513
514 Zsh:
515 sql-splitter completions zsh > \"${fpath[1]}/_sql-splitter\"
516 # or for oh-my-zsh: sql-splitter completions zsh > ~/.oh-my-zsh/completions/_sql-splitter
517
518 Fish:
519 sql-splitter completions fish > ~/.config/fish/completions/sql-splitter.fish
520
521 PowerShell:
522 sql-splitter completions powershell >> $PROFILE")]
523 Completions {
524 #[arg(value_enum)]
526 shell: Shell,
527 },
528}
529
530pub fn run(cli: Cli) -> anyhow::Result<()> {
531 match cli.command {
532 Commands::Split {
533 file,
534 output,
535 dialect,
536 verbose,
537 dry_run,
538 progress,
539 tables,
540 schema_only,
541 data_only,
542 fail_fast,
543 json,
544 } => split::run(
545 file,
546 output,
547 dialect,
548 verbose,
549 dry_run,
550 progress,
551 tables,
552 schema_only,
553 data_only,
554 fail_fast,
555 json,
556 ),
557 Commands::Analyze {
558 file,
559 dialect,
560 progress,
561 fail_fast,
562 json,
563 } => analyze::run(file, dialect, progress, fail_fast, json),
564 Commands::Merge {
565 input_dir,
566 output,
567 dialect,
568 tables,
569 exclude,
570 transaction,
571 no_header,
572 progress,
573 dry_run,
574 json,
575 } => merge::run(
576 input_dir,
577 output,
578 dialect,
579 tables,
580 exclude,
581 transaction,
582 no_header,
583 progress,
584 dry_run,
585 json,
586 ),
587 Commands::Sample {
588 file,
589 output,
590 dialect,
591 percent,
592 rows,
593 preserve_relations,
594 tables,
595 exclude,
596 root_tables,
597 include_global,
598 seed,
599 config,
600 max_total_rows,
601 no_limit,
602 strict_fk,
603 no_schema,
604 progress,
605 dry_run,
606 json,
607 } => {
608 let effective_limit = if no_limit || max_total_rows == Some(0) {
609 None
610 } else {
611 max_total_rows
612 };
613 sample::run(
614 file,
615 output,
616 dialect,
617 percent,
618 rows,
619 preserve_relations,
620 tables,
621 exclude,
622 root_tables,
623 include_global,
624 seed,
625 config,
626 effective_limit,
627 strict_fk,
628 no_schema,
629 progress,
630 dry_run,
631 json,
632 )
633 }
634 Commands::Shard {
635 file,
636 output,
637 dialect,
638 tenant_column,
639 tenant_value,
640 tenant_values,
641 root_tables,
642 include_global,
643 config,
644 max_selected_rows,
645 no_limit,
646 strict_fk,
647 no_schema,
648 progress,
649 dry_run,
650 json,
651 } => {
652 let effective_limit = if no_limit || max_selected_rows == Some(0) {
653 None
654 } else {
655 max_selected_rows
656 };
657 shard::run(
658 file,
659 output,
660 dialect,
661 tenant_column,
662 tenant_value,
663 tenant_values,
664 root_tables,
665 include_global,
666 config,
667 effective_limit,
668 strict_fk,
669 no_schema,
670 progress,
671 dry_run,
672 json,
673 )
674 }
675 Commands::Convert {
676 file,
677 output,
678 from,
679 to,
680 strict,
681 progress,
682 dry_run,
683 fail_fast,
684 json,
685 } => convert::run(
686 file, output, from, to, strict, progress, dry_run, fail_fast, json,
687 ),
688 Commands::Validate {
689 file,
690 dialect,
691 progress,
692 strict,
693 json,
694 max_rows_per_table,
695 no_limit,
696 no_fk_checks,
697 fail_fast,
698 } => {
699 let effective_limit = if no_limit || max_rows_per_table == 0 {
700 usize::MAX
701 } else {
702 max_rows_per_table
703 };
704 validate::run(
705 file,
706 dialect,
707 progress,
708 strict,
709 json,
710 effective_limit,
711 no_fk_checks,
712 fail_fast,
713 )
714 }
715 Commands::Diff {
716 old_file,
717 new_file,
718 output,
719 tables,
720 exclude,
721 schema_only,
722 data_only,
723 format,
724 dialect,
725 verbose,
726 progress,
727 max_pk_entries,
728 allow_no_pk,
729 ignore_order,
730 primary_key,
731 ignore_columns,
732 } => diff::run(
733 old_file,
734 new_file,
735 output,
736 tables,
737 exclude,
738 schema_only,
739 data_only,
740 format,
741 dialect,
742 verbose,
743 progress,
744 max_pk_entries,
745 allow_no_pk,
746 ignore_order,
747 primary_key,
748 ignore_columns,
749 ),
750 Commands::Completions { shell } => {
751 generate(
752 shell,
753 &mut Cli::command(),
754 "sql-splitter",
755 &mut io::stdout(),
756 );
757 Ok(())
758 }
759 }
760}