sql_splitter/cmd/
mod.rs

1pub(crate) mod analyze;
2pub(crate) mod convert;
3mod diff;
4mod glob_util;
5pub(crate) mod graph;
6pub(crate) mod merge;
7mod order;
8#[cfg(feature = "duckdb-query")]
9mod query;
10pub(crate) mod redact;
11pub(crate) mod sample;
12pub(crate) mod shard;
13pub(crate) mod split;
14pub(crate) mod validate;
15
16use clap::{CommandFactory, Parser, Subcommand, ValueHint};
17use clap_complete::{generate, Shell};
18use std::io;
19use std::path::PathBuf;
20
21const AFTER_HELP: &str = "\x1b[1mCommon workflows:\x1b[0m
22  Split a dump into per-table files:
23    sql-splitter split dump.sql -o tables/
24
25  Create a 10% sample for development:
26    sql-splitter sample dump.sql -o dev.sql --percent 10 --preserve-relations
27
28  Convert MySQL to PostgreSQL:
29    sql-splitter convert mysql.sql --to postgres -o pg.sql
30
31  Compare two dumps for changes:
32    sql-splitter diff old.sql new.sql --format sql -o migration.sql
33
34\x1b[1mMore info:\x1b[0m
35  Run 'sql-splitter <command> --help' for command-specific options.
36  Documentation: https://github.com/helgesverre/sql-splitter
37  Enable completions: sql-splitter completions <shell>";
38
39#[derive(Parser)]
40#[command(name = "sql-splitter")]
41#[command(author = "Helge Sverre <helge.sverre@gmail.com>")]
42#[command(version)]
43#[command(
44    about = "High-performance CLI for splitting, merging, converting, and analyzing SQL dump files"
45)]
46#[command(after_help = AFTER_HELP)]
47#[command(arg_required_else_help = true)]
48#[command(max_term_width = 100)]
49pub struct Cli {
50    #[command(subcommand)]
51    pub command: Commands,
52}
53
54// Help heading constants for consistency
55const INPUT_OUTPUT: &str = "Input/Output";
56const FILTERING: &str = "Filtering";
57const MODE: &str = "Mode";
58const BEHAVIOR: &str = "Behavior";
59const LIMITS: &str = "Limits";
60const OUTPUT_FORMAT: &str = "Output";
61
62#[derive(Subcommand)]
63pub enum Commands {
64    /// Split a SQL dump into individual table files
65    #[command(visible_alias = "sp")]
66    #[command(after_help = "\x1b[1mExamples:\x1b[0m
67  sql-splitter split dump.sql -o tables/
68  sql-splitter split dump.sql.gz -o tables/ --tables users,orders
69  sql-splitter split dump.sql -o schema/ --schema-only
70  sql-splitter split \"backups/*.sql\" -o out/ --fail-fast")]
71    Split {
72        /// Input SQL file or glob pattern (e.g., *.sql, dumps/**/*.sql)
73        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
74        file: PathBuf,
75
76        /// Output directory for split files
77        #[arg(short, long, default_value = "output", value_hint = ValueHint::DirPath, help_heading = INPUT_OUTPUT)]
78        output: PathBuf,
79
80        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
81        #[arg(short, long, help_heading = INPUT_OUTPUT)]
82        dialect: Option<String>,
83
84        /// Only split specific tables (comma-separated)
85        #[arg(short, long, help_heading = FILTERING)]
86        tables: Option<String>,
87
88        /// Only include schema statements (CREATE, ALTER, DROP)
89        #[arg(long, conflicts_with = "data_only", help_heading = FILTERING)]
90        schema_only: bool,
91
92        /// Only include data statements (INSERT, COPY)
93        #[arg(long, conflicts_with = "schema_only", help_heading = FILTERING)]
94        data_only: bool,
95
96        /// Show verbose output
97        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
98        verbose: bool,
99
100        /// Show progress bar
101        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
102        progress: bool,
103
104        /// Output results as JSON
105        #[arg(long, help_heading = OUTPUT_FORMAT)]
106        json: bool,
107
108        /// Preview without writing files
109        #[arg(long, help_heading = BEHAVIOR)]
110        dry_run: bool,
111
112        /// Stop on first error (for glob patterns)
113        #[arg(long, help_heading = BEHAVIOR)]
114        fail_fast: bool,
115    },
116
117    /// Analyze a SQL dump and display table statistics
118    #[command(visible_alias = "an")]
119    #[command(after_help = "\x1b[1mExamples:\x1b[0m
120  sql-splitter analyze dump.sql
121  sql-splitter analyze dump.sql.gz --progress
122  sql-splitter analyze \"dumps/*.sql\" --json")]
123    Analyze {
124        /// Input SQL file or glob pattern
125        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
126        file: PathBuf,
127
128        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
129        #[arg(short, long, help_heading = INPUT_OUTPUT)]
130        dialect: Option<String>,
131
132        /// Show progress bar
133        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
134        progress: bool,
135
136        /// Output results as JSON
137        #[arg(long, help_heading = OUTPUT_FORMAT)]
138        json: bool,
139
140        /// Stop on first error (for glob patterns)
141        #[arg(long, help_heading = BEHAVIOR)]
142        fail_fast: bool,
143    },
144
145    /// Merge split SQL files back into a single dump
146    #[command(visible_alias = "mg")]
147    #[command(after_help = "\x1b[1mExamples:\x1b[0m
148  sql-splitter merge tables/ -o restored.sql
149  sql-splitter merge tables/ -o restored.sql --transaction
150  sql-splitter merge tables/ -o partial.sql --tables users,orders
151  sql-splitter merge tables/ -o clean.sql --exclude logs,cache")]
152    Merge {
153        /// Directory containing split SQL files
154        #[arg(value_hint = ValueHint::DirPath, help_heading = INPUT_OUTPUT)]
155        input_dir: PathBuf,
156
157        /// Output SQL file (default: stdout)
158        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
159        output: Option<PathBuf>,
160
161        /// SQL dialect for output formatting
162        #[arg(short, long, default_value = "mysql", help_heading = INPUT_OUTPUT)]
163        dialect: Option<String>,
164
165        /// Only merge specific tables (comma-separated)
166        #[arg(short, long, help_heading = FILTERING)]
167        tables: Option<String>,
168
169        /// Exclude specific tables (comma-separated)
170        #[arg(short, long, help_heading = FILTERING)]
171        exclude: Option<String>,
172
173        /// Wrap output in BEGIN/COMMIT transaction
174        #[arg(long, help_heading = BEHAVIOR)]
175        transaction: bool,
176
177        /// Omit header comments
178        #[arg(long, help_heading = BEHAVIOR)]
179        no_header: bool,
180
181        /// Show progress bar
182        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
183        progress: bool,
184
185        /// Output results as JSON
186        #[arg(long, help_heading = OUTPUT_FORMAT)]
187        json: bool,
188
189        /// Preview without writing files
190        #[arg(long, help_heading = BEHAVIOR)]
191        dry_run: bool,
192    },
193
194    /// Create a reduced dataset preserving FK relationships
195    #[command(visible_alias = "sa")]
196    #[command(after_help = "\x1b[1mExamples:\x1b[0m
197  sql-splitter sample dump.sql -o dev.sql --percent 10
198  sql-splitter sample dump.sql -o dev.sql --rows 1000 --preserve-relations
199  sql-splitter sample dump.sql -o dev.sql --percent 5 --seed 42
200  sql-splitter sample dump.sql -o dev.sql --tables users,orders --percent 20")]
201    Sample {
202        /// Input SQL file (supports .gz, .bz2, .xz, .zst)
203        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
204        file: PathBuf,
205
206        /// Output SQL file (default: stdout)
207        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
208        output: Option<PathBuf>,
209
210        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
211        #[arg(short, long, help_heading = INPUT_OUTPUT)]
212        dialect: Option<String>,
213
214        /// YAML config file for per-table settings
215        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
216        config: Option<PathBuf>,
217
218        /// Sample percentage of rows (1-100)
219        #[arg(long, conflicts_with = "rows", help_heading = MODE)]
220        percent: Option<u32>,
221
222        /// Sample fixed number of rows per table
223        #[arg(long, conflicts_with = "percent", help_heading = MODE)]
224        rows: Option<usize>,
225
226        /// Random seed for reproducible sampling
227        #[arg(long, help_heading = MODE)]
228        seed: Option<u64>,
229
230        /// Only sample specific tables (comma-separated)
231        #[arg(short, long, help_heading = FILTERING)]
232        tables: Option<String>,
233
234        /// Exclude specific tables (comma-separated)
235        #[arg(short, long, help_heading = FILTERING)]
236        exclude: Option<String>,
237
238        /// Tables to start sampling from (comma-separated)
239        #[arg(long, help_heading = FILTERING)]
240        root_tables: Option<String>,
241
242        /// Handle lookup tables: none, lookups, all
243        #[arg(long, default_value = "lookups", help_heading = FILTERING)]
244        include_global: Option<String>,
245
246        /// Maintain FK integrity by including referenced rows
247        #[arg(long, help_heading = BEHAVIOR)]
248        preserve_relations: bool,
249
250        /// Fail on FK integrity violations
251        #[arg(long, help_heading = BEHAVIOR)]
252        strict_fk: bool,
253
254        /// Exclude CREATE TABLE statements from output
255        #[arg(long, help_heading = BEHAVIOR)]
256        no_schema: bool,
257
258        /// Max total rows to sample (0 = unlimited)
259        #[arg(long, help_heading = LIMITS)]
260        max_total_rows: Option<usize>,
261
262        /// Disable row limit
263        #[arg(long, help_heading = LIMITS)]
264        no_limit: bool,
265
266        /// Show progress bar
267        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
268        progress: bool,
269
270        /// Output results as JSON
271        #[arg(long, help_heading = OUTPUT_FORMAT)]
272        json: bool,
273
274        /// Preview without writing files
275        #[arg(long, help_heading = BEHAVIOR)]
276        dry_run: bool,
277    },
278
279    /// Extract tenant-specific data from a multi-tenant dump
280    #[command(visible_alias = "sh")]
281    #[command(after_help = "\x1b[1mExamples:\x1b[0m
282  sql-splitter shard dump.sql -o tenant.sql --tenant-value 123
283  sql-splitter shard dump.sql -o tenant.sql --tenant-column company_id --tenant-value 42
284  sql-splitter shard dump.sql -o shards/ --tenant-values \"1,2,3\"")]
285    Shard {
286        /// Input SQL file (supports .gz, .bz2, .xz, .zst)
287        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
288        file: PathBuf,
289
290        /// Output SQL file or directory (default: stdout)
291        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
292        output: Option<PathBuf>,
293
294        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
295        #[arg(short, long, help_heading = INPUT_OUTPUT)]
296        dialect: Option<String>,
297
298        /// YAML config file for table classification
299        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
300        config: Option<PathBuf>,
301
302        /// Column containing tenant ID (auto-detected if omitted)
303        #[arg(long, help_heading = MODE)]
304        tenant_column: Option<String>,
305
306        /// Single tenant value to extract
307        #[arg(long, conflicts_with = "tenant_values", help_heading = MODE)]
308        tenant_value: Option<String>,
309
310        /// Multiple tenant values (comma-separated, outputs to directory)
311        #[arg(long, conflicts_with = "tenant_value", help_heading = MODE)]
312        tenant_values: Option<String>,
313
314        /// Tables containing tenant column (comma-separated)
315        #[arg(long, help_heading = FILTERING)]
316        root_tables: Option<String>,
317
318        /// Handle lookup tables: none, lookups, all
319        #[arg(long, default_value = "lookups", help_heading = FILTERING)]
320        include_global: Option<String>,
321
322        /// Fail on FK integrity violations
323        #[arg(long, help_heading = BEHAVIOR)]
324        strict_fk: bool,
325
326        /// Exclude CREATE TABLE statements from output
327        #[arg(long, help_heading = BEHAVIOR)]
328        no_schema: bool,
329
330        /// Max rows to select (0 = unlimited)
331        #[arg(long, help_heading = LIMITS)]
332        max_selected_rows: Option<usize>,
333
334        /// Disable row limit
335        #[arg(long, help_heading = LIMITS)]
336        no_limit: bool,
337
338        /// Show progress bar
339        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
340        progress: bool,
341
342        /// Output results as JSON
343        #[arg(long, help_heading = OUTPUT_FORMAT)]
344        json: bool,
345
346        /// Preview without writing files
347        #[arg(long, help_heading = BEHAVIOR)]
348        dry_run: bool,
349    },
350
351    /// Convert a SQL dump between MySQL, PostgreSQL, and SQLite
352    #[command(visible_alias = "cv")]
353    #[command(after_help = "\x1b[1mExamples:\x1b[0m
354  sql-splitter convert mysql.sql --to postgres -o pg.sql
355  sql-splitter convert pg_dump.sql --to mysql -o mysql.sql
356  sql-splitter convert dump.sql --from mysql --to sqlite -o sqlite.sql
357  sql-splitter convert mysql.sql --to postgres | psql mydb")]
358    Convert {
359        /// Input SQL file or glob pattern
360        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
361        file: PathBuf,
362
363        /// Output SQL file or directory (default: stdout)
364        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
365        output: Option<PathBuf>,
366
367        /// Source dialect (auto-detected if omitted)
368        #[arg(long, help_heading = MODE)]
369        from: Option<String>,
370
371        /// Target dialect (required)
372        #[arg(long, help_heading = MODE)]
373        to: String,
374
375        /// Fail on unsupported features instead of warning
376        #[arg(long, help_heading = BEHAVIOR)]
377        strict: bool,
378
379        /// Show progress bar
380        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
381        progress: bool,
382
383        /// Output results as JSON
384        #[arg(long, help_heading = OUTPUT_FORMAT)]
385        json: bool,
386
387        /// Preview without writing files
388        #[arg(long, help_heading = BEHAVIOR)]
389        dry_run: bool,
390
391        /// Stop on first error (for glob patterns)
392        #[arg(long, help_heading = BEHAVIOR)]
393        fail_fast: bool,
394    },
395
396    /// Validate SQL dump syntax, encoding, and data integrity
397    #[command(visible_alias = "val")]
398    #[command(after_help = "\x1b[1mExamples:\x1b[0m
399  sql-splitter validate dump.sql
400  sql-splitter validate dump.sql --strict
401  sql-splitter validate \"dumps/*.sql\" --json --fail-fast
402  sql-splitter validate dump.sql --no-fk-checks")]
403    Validate {
404        /// Input SQL file or glob pattern
405        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
406        file: PathBuf,
407
408        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
409        #[arg(short, long, help_heading = INPUT_OUTPUT)]
410        dialect: Option<String>,
411
412        /// Treat warnings as errors (exit code 1)
413        #[arg(long, help_heading = BEHAVIOR)]
414        strict: bool,
415
416        /// Skip PK/FK data integrity checks
417        #[arg(long, help_heading = BEHAVIOR)]
418        no_fk_checks: bool,
419
420        /// Stop on first error (for glob patterns)
421        #[arg(long, help_heading = BEHAVIOR)]
422        fail_fast: bool,
423
424        /// Max rows per table for PK/FK checks (0 = unlimited)
425        #[arg(long, default_value = "1000000", help_heading = LIMITS)]
426        max_rows_per_table: usize,
427
428        /// Disable row limit for PK/FK checks
429        #[arg(long, help_heading = LIMITS)]
430        no_limit: bool,
431
432        /// Show progress bar
433        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
434        progress: bool,
435
436        /// Output results as JSON
437        #[arg(long, help_heading = OUTPUT_FORMAT)]
438        json: bool,
439    },
440
441    /// Compare two SQL dumps and report schema + data differences
442    #[command(visible_alias = "df")]
443    #[command(after_help = "\x1b[1mExamples:\x1b[0m
444  sql-splitter diff old.sql new.sql
445  sql-splitter diff old.sql new.sql --schema-only
446  sql-splitter diff old.sql new.sql --format sql -o migration.sql
447  sql-splitter diff old.sql new.sql --verbose --ignore-columns \"*.updated_at\"
448  sql-splitter diff old.sql new.sql --primary-key logs:timestamp+message")]
449    Diff {
450        /// Original SQL dump file
451        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
452        old_file: PathBuf,
453
454        /// Updated SQL dump file
455        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
456        new_file: PathBuf,
457
458        /// Output file (default: stdout)
459        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
460        output: Option<PathBuf>,
461
462        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
463        #[arg(short, long, help_heading = INPUT_OUTPUT)]
464        dialect: Option<String>,
465
466        /// Only compare these tables (comma-separated)
467        #[arg(short, long, help_heading = FILTERING)]
468        tables: Option<String>,
469
470        /// Exclude these tables (comma-separated)
471        #[arg(short, long, help_heading = FILTERING)]
472        exclude: Option<String>,
473
474        /// Ignore columns matching glob patterns (e.g., *.updated_at)
475        #[arg(long, help_heading = FILTERING)]
476        ignore_columns: Option<String>,
477
478        /// Compare schema only, skip data
479        #[arg(long, conflicts_with = "data_only", help_heading = MODE)]
480        schema_only: bool,
481
482        /// Compare data only, skip schema
483        #[arg(long, conflicts_with = "schema_only", help_heading = MODE)]
484        data_only: bool,
485
486        /// Override primary key (format: table:col1+col2,table2:col)
487        #[arg(long, help_heading = MODE)]
488        primary_key: Option<String>,
489
490        /// Compare tables without PK using all columns as key
491        #[arg(long, help_heading = BEHAVIOR)]
492        allow_no_pk: bool,
493
494        /// Ignore column order differences in schema
495        #[arg(long, help_heading = BEHAVIOR)]
496        ignore_order: bool,
497
498        /// Max PK entries per table (limits memory)
499        #[arg(long, default_value = "10000000", help_heading = LIMITS)]
500        max_pk_entries: usize,
501
502        /// Output format: text, json, sql
503        #[arg(short, long, default_value = "text", help_heading = OUTPUT_FORMAT)]
504        format: Option<String>,
505
506        /// Show sample PK values for changes
507        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
508        verbose: bool,
509
510        /// Show progress bar
511        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
512        progress: bool,
513    },
514
515    /// Redact sensitive data (PII) from SQL dumps
516    #[command(visible_alias = "rd")]
517    #[command(after_help = "\x1b[1mExamples:\x1b[0m
518  sql-splitter redact dump.sql -o safe.sql --config redact.yaml
519  sql-splitter redact dump.sql -o safe.sql --null \"*.ssn\" --hash \"*.email\"
520  sql-splitter redact dump.sql --generate-config -o redact.yaml
521  sql-splitter redact dump.sql -o safe.sql --config redact.yaml --seed 42")]
522    Redact {
523        /// Input SQL file (supports .gz, .bz2, .xz, .zst)
524        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
525        file: PathBuf,
526
527        /// Output file (default: stdout)
528        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
529        output: Option<PathBuf>,
530
531        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
532        #[arg(short, long, help_heading = INPUT_OUTPUT)]
533        dialect: Option<String>,
534
535        /// YAML config file for redaction rules
536        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
537        config: Option<PathBuf>,
538
539        /// Generate annotated YAML config by analyzing input file
540        #[arg(long, help_heading = MODE)]
541        generate_config: bool,
542
543        /// Columns to set to NULL (glob patterns, comma-separated)
544        #[arg(long, value_delimiter = ',', help_heading = "Inline Strategies")]
545        null: Vec<String>,
546
547        /// Columns to hash with SHA256 (glob patterns)
548        #[arg(long, value_delimiter = ',', help_heading = "Inline Strategies")]
549        hash: Vec<String>,
550
551        /// Columns to replace with fake data (glob patterns)
552        #[arg(long, value_delimiter = ',', help_heading = "Inline Strategies")]
553        fake: Vec<String>,
554
555        /// Columns to mask (format: pattern=column, e.g., "****-XXXX=*.credit_card")
556        #[arg(long, value_delimiter = ',', help_heading = "Inline Strategies")]
557        mask: Vec<String>,
558
559        /// Column=value pairs for constant replacement
560        #[arg(long, value_delimiter = ',', help_heading = "Inline Strategies")]
561        constant: Vec<String>,
562
563        /// Random seed for reproducible redaction
564        #[arg(long, help_heading = MODE)]
565        seed: Option<u64>,
566
567        /// Locale for fake data generation (default: en)
568        #[arg(long, default_value = "en", help_heading = MODE)]
569        locale: String,
570
571        /// Only redact specific tables (comma-separated)
572        #[arg(short, long, value_delimiter = ',', help_heading = FILTERING)]
573        tables: Vec<String>,
574
575        /// Exclude specific tables (comma-separated)
576        #[arg(short = 'x', long, value_delimiter = ',', help_heading = FILTERING)]
577        exclude: Vec<String>,
578
579        /// Fail on warnings (e.g., unsupported locale)
580        #[arg(long, help_heading = BEHAVIOR)]
581        strict: bool,
582
583        /// Show progress bar
584        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
585        progress: bool,
586
587        /// Preview without writing files
588        #[arg(long, help_heading = BEHAVIOR)]
589        dry_run: bool,
590
591        /// Output results as JSON
592        #[arg(long, help_heading = OUTPUT_FORMAT)]
593        json: bool,
594
595        /// Validate config only, don't process
596        #[arg(long, help_heading = BEHAVIOR)]
597        validate: bool,
598    },
599
600    /// Generate Entity Relationship Diagram (ERD) from SQL dump
601    #[command(visible_alias = "gr")]
602    #[command(after_help = "\x1b[1mExamples:\x1b[0m
603  sql-splitter graph dump.sql -o schema.html
604  sql-splitter graph dump.sql -o schema.mmd --format mermaid
605  sql-splitter graph dump.sql -o schema.png --render
606  sql-splitter graph dump.sql --cycles-only
607  sql-splitter graph dump.sql --table users --transitive")]
608    Graph {
609        /// Input SQL file (supports .gz, .bz2, .xz, .zst)
610        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
611        file: PathBuf,
612
613        /// Output file (.html, .dot, .mmd, .json, .png, .svg)
614        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
615        output: Option<PathBuf>,
616
617        /// Output format: html, dot, mermaid, json
618        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
619        format: Option<String>,
620
621        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
622        #[arg(short, long, help_heading = INPUT_OUTPUT)]
623        dialect: Option<String>,
624
625        /// Layout direction: lr (left-right), tb (top-bottom)
626        #[arg(long, default_value = "lr", help_heading = OUTPUT_FORMAT)]
627        layout: Option<String>,
628
629        /// Show only tables involved in circular dependencies
630        #[arg(long, help_heading = FILTERING)]
631        cycles_only: bool,
632
633        /// Focus on a specific table
634        #[arg(long, help_heading = FILTERING)]
635        table: Option<String>,
636
637        /// Show transitive dependencies (parents of parents)
638        #[arg(long, help_heading = FILTERING)]
639        transitive: bool,
640
641        /// Show reverse dependencies (who references this table)
642        #[arg(long, help_heading = FILTERING)]
643        reverse: bool,
644
645        /// Only include these tables (comma-separated, supports globs)
646        #[arg(short, long, help_heading = FILTERING)]
647        tables: Option<String>,
648
649        /// Exclude these tables (comma-separated, supports globs)
650        #[arg(short, long, help_heading = FILTERING)]
651        exclude: Option<String>,
652
653        /// Maximum depth for transitive dependencies
654        #[arg(long, help_heading = FILTERING)]
655        max_depth: Option<usize>,
656
657        /// Render DOT to PNG/SVG using Graphviz
658        #[arg(long, help_heading = BEHAVIOR)]
659        render: bool,
660
661        /// Show progress bar
662        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
663        progress: bool,
664
665        /// Output results as JSON
666        #[arg(long, help_heading = OUTPUT_FORMAT)]
667        json: bool,
668    },
669
670    /// Output SQL dump with tables in topological FK order
671    #[command(visible_alias = "ord")]
672    #[command(after_help = "\x1b[1mExamples:\x1b[0m
673  sql-splitter order dump.sql -o ordered.sql
674  sql-splitter order dump.sql --check
675  sql-splitter order dump.sql --dry-run
676  sql-splitter order dump.sql --reverse")]
677    Order {
678        /// Input SQL file (supports .gz, .bz2, .xz, .zst)
679        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
680        file: PathBuf,
681
682        /// Output file (default: stdout)
683        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
684        output: Option<PathBuf>,
685
686        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
687        #[arg(short, long, help_heading = INPUT_OUTPUT)]
688        dialect: Option<String>,
689
690        /// Verify ordering without writing output
691        #[arg(long, help_heading = BEHAVIOR)]
692        check: bool,
693
694        /// Show order without rewriting the file
695        #[arg(long, help_heading = BEHAVIOR)]
696        dry_run: bool,
697
698        /// Order children before parents (for DROP operations)
699        #[arg(long, help_heading = BEHAVIOR)]
700        reverse: bool,
701    },
702
703    /// Query SQL dumps using DuckDB's analytical engine
704    #[cfg(feature = "duckdb-query")]
705    #[command(visible_alias = "qy")]
706    #[command(after_help = "\x1b[1mExamples:\x1b[0m
707  sql-splitter query dump.sql \"SELECT COUNT(*) FROM users\"
708  sql-splitter query dump.sql \"SELECT * FROM orders WHERE total > 100\" -f json
709  sql-splitter query dump.sql \"SELECT * FROM users LIMIT 10\" -o results.csv -f csv
710  sql-splitter query dump.sql --interactive
711  sql-splitter query huge.sql \"SELECT ...\" --disk
712  sql-splitter query dump.sql \"SELECT ...\" --cache
713  sql-splitter query --list-cache")]
714    Query(query::QueryArgs),
715
716    /// Generate JSON schemas for --json output types (developer tool)
717    #[command(hide = true)]
718    Schema {
719        /// Output directory for schema files
720        #[arg(short, long, default_value = "schemas", value_hint = ValueHint::DirPath)]
721        output: PathBuf,
722
723        /// Generate schema for a specific command only
724        #[arg(short, long)]
725        command: Option<String>,
726
727        /// Print schemas to stdout instead of writing files
728        #[arg(long)]
729        stdout: bool,
730
731        /// List available schema names
732        #[arg(long)]
733        list: bool,
734    },
735
736    /// Generate shell completion scripts
737    #[command(after_help = "\x1b[1mInstallation:\x1b[0m
738  Bash:
739    sql-splitter completions bash > /etc/bash_completion.d/sql-splitter
740    # or: sql-splitter completions bash >> ~/.bashrc
741
742  Zsh:
743    sql-splitter completions zsh > \"${fpath[1]}/_sql-splitter\"
744    # or for oh-my-zsh: sql-splitter completions zsh > ~/.oh-my-zsh/completions/_sql-splitter
745
746  Fish:
747    sql-splitter completions fish > ~/.config/fish/completions/sql-splitter.fish
748
749  PowerShell:
750    sql-splitter completions powershell >> $PROFILE")]
751    Completions {
752        /// Target shell
753        #[arg(value_enum)]
754        shell: Shell,
755    },
756}
757
758pub fn run(cli: Cli) -> anyhow::Result<()> {
759    match cli.command {
760        Commands::Split {
761            file,
762            output,
763            dialect,
764            verbose,
765            dry_run,
766            progress,
767            tables,
768            schema_only,
769            data_only,
770            fail_fast,
771            json,
772        } => split::run(
773            file,
774            output,
775            dialect,
776            verbose,
777            dry_run,
778            progress,
779            tables,
780            schema_only,
781            data_only,
782            fail_fast,
783            json,
784        ),
785        Commands::Analyze {
786            file,
787            dialect,
788            progress,
789            fail_fast,
790            json,
791        } => analyze::run(file, dialect, progress, fail_fast, json),
792        Commands::Merge {
793            input_dir,
794            output,
795            dialect,
796            tables,
797            exclude,
798            transaction,
799            no_header,
800            progress,
801            dry_run,
802            json,
803        } => merge::run(
804            input_dir,
805            output,
806            dialect,
807            tables,
808            exclude,
809            transaction,
810            no_header,
811            progress,
812            dry_run,
813            json,
814        ),
815        Commands::Sample {
816            file,
817            output,
818            dialect,
819            percent,
820            rows,
821            preserve_relations,
822            tables,
823            exclude,
824            root_tables,
825            include_global,
826            seed,
827            config,
828            max_total_rows,
829            no_limit,
830            strict_fk,
831            no_schema,
832            progress,
833            dry_run,
834            json,
835        } => {
836            let effective_limit = if no_limit || max_total_rows == Some(0) {
837                None
838            } else {
839                max_total_rows
840            };
841            sample::run(
842                file,
843                output,
844                dialect,
845                percent,
846                rows,
847                preserve_relations,
848                tables,
849                exclude,
850                root_tables,
851                include_global,
852                seed,
853                config,
854                effective_limit,
855                strict_fk,
856                no_schema,
857                progress,
858                dry_run,
859                json,
860            )
861        }
862        Commands::Shard {
863            file,
864            output,
865            dialect,
866            tenant_column,
867            tenant_value,
868            tenant_values,
869            root_tables,
870            include_global,
871            config,
872            max_selected_rows,
873            no_limit,
874            strict_fk,
875            no_schema,
876            progress,
877            dry_run,
878            json,
879        } => {
880            let effective_limit = if no_limit || max_selected_rows == Some(0) {
881                None
882            } else {
883                max_selected_rows
884            };
885            shard::run(
886                file,
887                output,
888                dialect,
889                tenant_column,
890                tenant_value,
891                tenant_values,
892                root_tables,
893                include_global,
894                config,
895                effective_limit,
896                strict_fk,
897                no_schema,
898                progress,
899                dry_run,
900                json,
901            )
902        }
903        Commands::Convert {
904            file,
905            output,
906            from,
907            to,
908            strict,
909            progress,
910            dry_run,
911            fail_fast,
912            json,
913        } => convert::run(
914            file, output, from, to, strict, progress, dry_run, fail_fast, json,
915        ),
916        Commands::Validate {
917            file,
918            dialect,
919            progress,
920            strict,
921            json,
922            max_rows_per_table,
923            no_limit,
924            no_fk_checks,
925            fail_fast,
926        } => {
927            let effective_limit = if no_limit || max_rows_per_table == 0 {
928                usize::MAX
929            } else {
930                max_rows_per_table
931            };
932            validate::run(
933                file,
934                dialect,
935                progress,
936                strict,
937                json,
938                effective_limit,
939                no_fk_checks,
940                fail_fast,
941            )
942        }
943        Commands::Diff {
944            old_file,
945            new_file,
946            output,
947            tables,
948            exclude,
949            schema_only,
950            data_only,
951            format,
952            dialect,
953            verbose,
954            progress,
955            max_pk_entries,
956            allow_no_pk,
957            ignore_order,
958            primary_key,
959            ignore_columns,
960        } => diff::run(
961            old_file,
962            new_file,
963            output,
964            tables,
965            exclude,
966            schema_only,
967            data_only,
968            format,
969            dialect,
970            verbose,
971            progress,
972            max_pk_entries,
973            allow_no_pk,
974            ignore_order,
975            primary_key,
976            ignore_columns,
977        ),
978        Commands::Redact {
979            file,
980            output,
981            dialect,
982            config,
983            generate_config,
984            null,
985            hash,
986            fake,
987            mask,
988            constant,
989            seed,
990            locale,
991            tables,
992            exclude,
993            strict,
994            progress,
995            dry_run,
996            json,
997            validate,
998        } => redact::run(
999            file,
1000            output,
1001            dialect,
1002            config,
1003            generate_config,
1004            null,
1005            hash,
1006            fake,
1007            mask,
1008            constant,
1009            seed,
1010            locale,
1011            tables,
1012            exclude,
1013            strict,
1014            progress,
1015            dry_run,
1016            json,
1017            validate,
1018        ),
1019        Commands::Graph {
1020            file,
1021            output,
1022            format,
1023            dialect,
1024            layout,
1025            cycles_only,
1026            table,
1027            transitive,
1028            reverse,
1029            tables,
1030            exclude,
1031            max_depth,
1032            render,
1033            progress,
1034            json,
1035        } => graph::run(
1036            file,
1037            output,
1038            format,
1039            dialect,
1040            layout,
1041            cycles_only,
1042            table,
1043            transitive,
1044            reverse,
1045            tables,
1046            exclude,
1047            max_depth,
1048            render,
1049            progress,
1050            json,
1051        ),
1052        Commands::Order {
1053            file,
1054            output,
1055            dialect,
1056            check,
1057            dry_run,
1058            reverse,
1059        } => order::run(file, output, dialect, check, dry_run, reverse),
1060        #[cfg(feature = "duckdb-query")]
1061        Commands::Query(args) => query::run(args),
1062        Commands::Schema {
1063            output,
1064            command,
1065            stdout,
1066            list,
1067        } => run_schema(output, command, stdout, list),
1068        Commands::Completions { shell } => {
1069            generate(
1070                shell,
1071                &mut Cli::command(),
1072                "sql-splitter",
1073                &mut io::stdout(),
1074            );
1075            Ok(())
1076        }
1077    }
1078}
1079
1080fn run_schema(
1081    output_dir: PathBuf,
1082    command: Option<String>,
1083    to_stdout: bool,
1084    list: bool,
1085) -> anyhow::Result<()> {
1086    use crate::json_schema;
1087    use std::fs;
1088
1089    if list {
1090        let schemas = json_schema::all_schemas();
1091        for name in schemas.keys() {
1092            println!("{}", name);
1093        }
1094        return Ok(());
1095    }
1096
1097    if let Some(cmd) = command {
1098        let schema = json_schema::get_schema(&cmd).ok_or_else(|| {
1099            anyhow::anyhow!(
1100                "Unknown command: {}. Use --list to see available schemas.",
1101                cmd
1102            )
1103        })?;
1104
1105        let json = serde_json::to_string_pretty(&schema)?;
1106
1107        if to_stdout {
1108            println!("{}", json);
1109        } else {
1110            fs::create_dir_all(&output_dir)?;
1111            let path = output_dir.join(format!("{}.schema.json", cmd));
1112            fs::write(&path, json)?;
1113            eprintln!("Wrote: {}", path.display());
1114        }
1115    } else if to_stdout {
1116        let schemas = json_schema::all_schemas();
1117        for (name, schema) in &schemas {
1118            let json = serde_json::to_string_pretty(schema)?;
1119            println!("// {}.schema.json\n{}\n", name, json);
1120        }
1121    } else {
1122        let schemas = json_schema::all_schemas();
1123        fs::create_dir_all(&output_dir)?;
1124        for (name, schema) in &schemas {
1125            let json = serde_json::to_string_pretty(schema)?;
1126            let path = output_dir.join(format!("{}.schema.json", name));
1127            fs::write(&path, json)?;
1128            eprintln!("Wrote: {}", path.display());
1129        }
1130    }
1131
1132    Ok(())
1133}
sql_splitter/cmd/mod.rs

sql_splitter/cmd/
mod.rs