Skip to main content

sql_splitter/cmd/
mod.rs

1pub(crate) mod analyze;
2pub(crate) mod convert;
3mod diff;
4mod glob_util;
5pub(crate) mod graph;
6pub(crate) mod merge;
7mod order;
8mod query;
9pub(crate) mod redact;
10pub(crate) mod sample;
11pub(crate) mod shard;
12pub(crate) mod split;
13pub(crate) mod validate;
14
15use clap::{CommandFactory, Parser, Subcommand, ValueHint};
16use clap_complete::{generate, Shell};
17use std::io;
18use std::path::PathBuf;
19
20const AFTER_HELP: &str = "\x1b[1mCommon workflows:\x1b[0m
21  Split a dump into per-table files:
22    sql-splitter split dump.sql -o tables/
23
24  Create a 10% sample for development:
25    sql-splitter sample dump.sql -o dev.sql --percent 10 --preserve-relations
26
27  Convert MySQL to PostgreSQL:
28    sql-splitter convert mysql.sql --to postgres -o pg.sql
29
30  Compare two dumps for changes:
31    sql-splitter diff old.sql new.sql --format sql -o migration.sql
32
33\x1b[1mMore info:\x1b[0m
34  Run 'sql-splitter <command> --help' for command-specific options.
35  Documentation: https://github.com/helgesverre/sql-splitter
36  Enable completions: sql-splitter completions <shell>";
37
38#[derive(Parser)]
39#[command(name = "sql-splitter")]
40#[command(author = "Helge Sverre <helge.sverre@gmail.com>")]
41#[command(version)]
42#[command(
43    about = "High-performance CLI for splitting, merging, converting, and analyzing SQL dump files"
44)]
45#[command(after_help = AFTER_HELP)]
46#[command(arg_required_else_help = true)]
47#[command(max_term_width = 100)]
48pub struct Cli {
49    #[command(subcommand)]
50    pub command: Commands,
51}
52
53// Help heading constants for consistency
54const INPUT_OUTPUT: &str = "Input/Output";
55const FILTERING: &str = "Filtering";
56const MODE: &str = "Mode";
57const BEHAVIOR: &str = "Behavior";
58const LIMITS: &str = "Limits";
59const OUTPUT_FORMAT: &str = "Output";
60
61#[derive(Subcommand)]
62pub enum Commands {
63    /// Split a SQL dump into individual table files
64    #[command(visible_alias = "sp")]
65    #[command(after_help = "\x1b[1mExamples:\x1b[0m
66  sql-splitter split dump.sql -o tables/
67  sql-splitter split dump.sql.gz -o tables/ --tables users,orders
68  sql-splitter split dump.sql -o schema/ --schema-only
69  sql-splitter split \"backups/*.sql\" -o out/ --fail-fast")]
70    Split {
71        /// Input SQL file or glob pattern (e.g., *.sql, dumps/**/*.sql)
72        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
73        file: PathBuf,
74
75        /// Output directory for split files
76        #[arg(short, long, default_value = "output", value_hint = ValueHint::DirPath, help_heading = INPUT_OUTPUT)]
77        output: PathBuf,
78
79        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
80        #[arg(short, long, help_heading = INPUT_OUTPUT)]
81        dialect: Option<String>,
82
83        /// Only split specific tables (comma-separated)
84        #[arg(short, long, help_heading = FILTERING)]
85        tables: Option<String>,
86
87        /// Only include schema statements (CREATE, ALTER, DROP)
88        #[arg(long, conflicts_with = "data_only", help_heading = FILTERING)]
89        schema_only: bool,
90
91        /// Only include data statements (INSERT, COPY)
92        #[arg(long, conflicts_with = "schema_only", help_heading = FILTERING)]
93        data_only: bool,
94
95        /// Show verbose output
96        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
97        verbose: bool,
98
99        /// Show progress bar
100        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
101        progress: bool,
102
103        /// Output results as JSON
104        #[arg(long, help_heading = OUTPUT_FORMAT)]
105        json: bool,
106
107        /// Preview without writing files
108        #[arg(long, help_heading = BEHAVIOR)]
109        dry_run: bool,
110
111        /// Stop on first error (for glob patterns)
112        #[arg(long, help_heading = BEHAVIOR)]
113        fail_fast: bool,
114    },
115
116    /// Analyze a SQL dump and display table statistics
117    #[command(visible_alias = "an")]
118    #[command(after_help = "\x1b[1mExamples:\x1b[0m
119  sql-splitter analyze dump.sql
120  sql-splitter analyze dump.sql.gz --progress
121  sql-splitter analyze \"dumps/*.sql\" --json")]
122    Analyze {
123        /// Input SQL file or glob pattern
124        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
125        file: PathBuf,
126
127        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
128        #[arg(short, long, help_heading = INPUT_OUTPUT)]
129        dialect: Option<String>,
130
131        /// Show progress bar
132        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
133        progress: bool,
134
135        /// Output results as JSON
136        #[arg(long, help_heading = OUTPUT_FORMAT)]
137        json: bool,
138
139        /// Stop on first error (for glob patterns)
140        #[arg(long, help_heading = BEHAVIOR)]
141        fail_fast: bool,
142    },
143
144    /// Merge split SQL files back into a single dump
145    #[command(visible_alias = "mg")]
146    #[command(after_help = "\x1b[1mExamples:\x1b[0m
147  sql-splitter merge tables/ -o restored.sql
148  sql-splitter merge tables/ -o restored.sql --transaction
149  sql-splitter merge tables/ -o partial.sql --tables users,orders
150  sql-splitter merge tables/ -o clean.sql --exclude logs,cache")]
151    Merge {
152        /// Directory containing split SQL files
153        #[arg(value_hint = ValueHint::DirPath, help_heading = INPUT_OUTPUT)]
154        input_dir: PathBuf,
155
156        /// Output SQL file (default: stdout)
157        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
158        output: Option<PathBuf>,
159
160        /// SQL dialect for output formatting
161        #[arg(short, long, default_value = "mysql", help_heading = INPUT_OUTPUT)]
162        dialect: Option<String>,
163
164        /// Only merge specific tables (comma-separated)
165        #[arg(short, long, help_heading = FILTERING)]
166        tables: Option<String>,
167
168        /// Exclude specific tables (comma-separated)
169        #[arg(short, long, help_heading = FILTERING)]
170        exclude: Option<String>,
171
172        /// Wrap output in BEGIN/COMMIT transaction
173        #[arg(long, help_heading = BEHAVIOR)]
174        transaction: bool,
175
176        /// Omit header comments
177        #[arg(long, help_heading = BEHAVIOR)]
178        no_header: bool,
179
180        /// Show progress bar
181        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
182        progress: bool,
183
184        /// Output results as JSON
185        #[arg(long, help_heading = OUTPUT_FORMAT)]
186        json: bool,
187
188        /// Preview without writing files
189        #[arg(long, help_heading = BEHAVIOR)]
190        dry_run: bool,
191    },
192
193    /// Create a reduced dataset preserving FK relationships
194    #[command(visible_alias = "sa")]
195    #[command(after_help = "\x1b[1mExamples:\x1b[0m
196  sql-splitter sample dump.sql -o dev.sql --percent 10
197  sql-splitter sample dump.sql -o dev.sql --rows 1000 --preserve-relations
198  sql-splitter sample dump.sql -o dev.sql --percent 5 --seed 42
199  sql-splitter sample dump.sql -o dev.sql --tables users,orders --percent 20")]
200    Sample {
201        /// Input SQL file (supports .gz, .bz2, .xz, .zst)
202        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
203        file: PathBuf,
204
205        /// Output SQL file (default: stdout)
206        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
207        output: Option<PathBuf>,
208
209        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
210        #[arg(short, long, help_heading = INPUT_OUTPUT)]
211        dialect: Option<String>,
212
213        /// YAML config file for per-table settings
214        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
215        config: Option<PathBuf>,
216
217        /// Sample percentage of rows (1-100)
218        #[arg(long, conflicts_with = "rows", help_heading = MODE)]
219        percent: Option<u32>,
220
221        /// Sample fixed number of rows per table
222        #[arg(long, conflicts_with = "percent", help_heading = MODE)]
223        rows: Option<usize>,
224
225        /// Random seed for reproducible sampling
226        #[arg(long, help_heading = MODE)]
227        seed: Option<u64>,
228
229        /// Only sample specific tables (comma-separated)
230        #[arg(short, long, help_heading = FILTERING)]
231        tables: Option<String>,
232
233        /// Exclude specific tables (comma-separated)
234        #[arg(short, long, help_heading = FILTERING)]
235        exclude: Option<String>,
236
237        /// Tables to start sampling from (comma-separated)
238        #[arg(long, help_heading = FILTERING)]
239        root_tables: Option<String>,
240
241        /// Handle lookup tables: none, lookups, all
242        #[arg(long, default_value = "lookups", help_heading = FILTERING)]
243        include_global: Option<String>,
244
245        /// Maintain FK integrity by including referenced rows
246        #[arg(long, help_heading = BEHAVIOR)]
247        preserve_relations: bool,
248
249        /// Fail on FK integrity violations
250        #[arg(long, help_heading = BEHAVIOR)]
251        strict_fk: bool,
252
253        /// Exclude CREATE TABLE statements from output
254        #[arg(long, help_heading = BEHAVIOR)]
255        no_schema: bool,
256
257        /// Max total rows to sample (0 = unlimited)
258        #[arg(long, help_heading = LIMITS)]
259        max_total_rows: Option<usize>,
260
261        /// Disable row limit
262        #[arg(long, help_heading = LIMITS)]
263        no_limit: bool,
264
265        /// Show progress bar
266        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
267        progress: bool,
268
269        /// Output results as JSON
270        #[arg(long, help_heading = OUTPUT_FORMAT)]
271        json: bool,
272
273        /// Preview without writing files
274        #[arg(long, help_heading = BEHAVIOR)]
275        dry_run: bool,
276    },
277
278    /// Extract tenant-specific data from a multi-tenant dump
279    #[command(visible_alias = "sh")]
280    #[command(after_help = "\x1b[1mExamples:\x1b[0m
281  sql-splitter shard dump.sql -o tenant.sql --tenant-value 123
282  sql-splitter shard dump.sql -o tenant.sql --tenant-column company_id --tenant-value 42
283  sql-splitter shard dump.sql -o shards/ --tenant-values \"1,2,3\"")]
284    Shard {
285        /// Input SQL file (supports .gz, .bz2, .xz, .zst)
286        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
287        file: PathBuf,
288
289        /// Output SQL file or directory (default: stdout)
290        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
291        output: Option<PathBuf>,
292
293        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
294        #[arg(short, long, help_heading = INPUT_OUTPUT)]
295        dialect: Option<String>,
296
297        /// YAML config file for table classification
298        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
299        config: Option<PathBuf>,
300
301        /// Column containing tenant ID (auto-detected if omitted)
302        #[arg(long, help_heading = MODE)]
303        tenant_column: Option<String>,
304
305        /// Single tenant value to extract
306        #[arg(long, conflicts_with = "tenant_values", help_heading = MODE)]
307        tenant_value: Option<String>,
308
309        /// Multiple tenant values (comma-separated, outputs to directory)
310        #[arg(long, conflicts_with = "tenant_value", help_heading = MODE)]
311        tenant_values: Option<String>,
312
313        /// Tables containing tenant column (comma-separated)
314        #[arg(long, help_heading = FILTERING)]
315        root_tables: Option<String>,
316
317        /// Handle lookup tables: none, lookups, all
318        #[arg(long, default_value = "lookups", help_heading = FILTERING)]
319        include_global: Option<String>,
320
321        /// Fail on FK integrity violations
322        #[arg(long, help_heading = BEHAVIOR)]
323        strict_fk: bool,
324
325        /// Exclude CREATE TABLE statements from output
326        #[arg(long, help_heading = BEHAVIOR)]
327        no_schema: bool,
328
329        /// Max rows to select (0 = unlimited)
330        #[arg(long, help_heading = LIMITS)]
331        max_selected_rows: Option<usize>,
332
333        /// Disable row limit
334        #[arg(long, help_heading = LIMITS)]
335        no_limit: bool,
336
337        /// Show progress bar
338        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
339        progress: bool,
340
341        /// Output results as JSON
342        #[arg(long, help_heading = OUTPUT_FORMAT)]
343        json: bool,
344
345        /// Preview without writing files
346        #[arg(long, help_heading = BEHAVIOR)]
347        dry_run: bool,
348    },
349
350    /// Convert a SQL dump between MySQL, PostgreSQL, and SQLite
351    #[command(visible_alias = "cv")]
352    #[command(after_help = "\x1b[1mExamples:\x1b[0m
353  sql-splitter convert mysql.sql --to postgres -o pg.sql
354  sql-splitter convert pg_dump.sql --to mysql -o mysql.sql
355  sql-splitter convert dump.sql --from mysql --to sqlite -o sqlite.sql
356  sql-splitter convert mysql.sql --to postgres | psql mydb")]
357    Convert {
358        /// Input SQL file or glob pattern
359        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
360        file: PathBuf,
361
362        /// Output SQL file or directory (default: stdout)
363        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
364        output: Option<PathBuf>,
365
366        /// Source dialect (auto-detected if omitted)
367        #[arg(long, help_heading = MODE)]
368        from: Option<String>,
369
370        /// Target dialect (required)
371        #[arg(long, help_heading = MODE)]
372        to: String,
373
374        /// Fail on unsupported features instead of warning
375        #[arg(long, help_heading = BEHAVIOR)]
376        strict: bool,
377
378        /// Show progress bar
379        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
380        progress: bool,
381
382        /// Output results as JSON
383        #[arg(long, help_heading = OUTPUT_FORMAT)]
384        json: bool,
385
386        /// Preview without writing files
387        #[arg(long, help_heading = BEHAVIOR)]
388        dry_run: bool,
389
390        /// Stop on first error (for glob patterns)
391        #[arg(long, help_heading = BEHAVIOR)]
392        fail_fast: bool,
393    },
394
395    /// Validate SQL dump syntax, encoding, and data integrity
396    #[command(visible_alias = "val")]
397    #[command(after_help = "\x1b[1mExamples:\x1b[0m
398  sql-splitter validate dump.sql
399  sql-splitter validate dump.sql --strict
400  sql-splitter validate \"dumps/*.sql\" --json --fail-fast
401  sql-splitter validate dump.sql --no-fk-checks")]
402    Validate {
403        /// Input SQL file or glob pattern
404        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
405        file: PathBuf,
406
407        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
408        #[arg(short, long, help_heading = INPUT_OUTPUT)]
409        dialect: Option<String>,
410
411        /// Treat warnings as errors (exit code 1)
412        #[arg(long, help_heading = BEHAVIOR)]
413        strict: bool,
414
415        /// Skip PK/FK data integrity checks
416        #[arg(long, help_heading = BEHAVIOR)]
417        no_fk_checks: bool,
418
419        /// Stop on first error (for glob patterns)
420        #[arg(long, help_heading = BEHAVIOR)]
421        fail_fast: bool,
422
423        /// Max rows per table for PK/FK checks (0 = unlimited)
424        #[arg(long, default_value = "1000000", help_heading = LIMITS)]
425        max_rows_per_table: usize,
426
427        /// Disable row limit for PK/FK checks
428        #[arg(long, help_heading = LIMITS)]
429        no_limit: bool,
430
431        /// Show progress bar
432        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
433        progress: bool,
434
435        /// Output results as JSON
436        #[arg(long, help_heading = OUTPUT_FORMAT)]
437        json: bool,
438    },
439
440    /// Compare two SQL dumps and report schema + data differences
441    #[command(visible_alias = "df")]
442    #[command(after_help = "\x1b[1mExamples:\x1b[0m
443  sql-splitter diff old.sql new.sql
444  sql-splitter diff old.sql new.sql --schema-only
445  sql-splitter diff old.sql new.sql --format sql -o migration.sql
446  sql-splitter diff old.sql new.sql --verbose --ignore-columns \"*.updated_at\"
447  sql-splitter diff old.sql new.sql --primary-key logs:timestamp+message")]
448    Diff {
449        /// Original SQL dump file
450        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
451        old_file: PathBuf,
452
453        /// Updated SQL dump file
454        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
455        new_file: PathBuf,
456
457        /// Output file (default: stdout)
458        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
459        output: Option<PathBuf>,
460
461        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
462        #[arg(short, long, help_heading = INPUT_OUTPUT)]
463        dialect: Option<String>,
464
465        /// Only compare these tables (comma-separated)
466        #[arg(short, long, help_heading = FILTERING)]
467        tables: Option<String>,
468
469        /// Exclude these tables (comma-separated)
470        #[arg(short, long, help_heading = FILTERING)]
471        exclude: Option<String>,
472
473        /// Ignore columns matching glob patterns (e.g., *.updated_at)
474        #[arg(long, help_heading = FILTERING)]
475        ignore_columns: Option<String>,
476
477        /// Compare schema only, skip data
478        #[arg(long, conflicts_with = "data_only", help_heading = MODE)]
479        schema_only: bool,
480
481        /// Compare data only, skip schema
482        #[arg(long, conflicts_with = "schema_only", help_heading = MODE)]
483        data_only: bool,
484
485        /// Override primary key (format: table:col1+col2,table2:col)
486        #[arg(long, help_heading = MODE)]
487        primary_key: Option<String>,
488
489        /// Compare tables without PK using all columns as key
490        #[arg(long, help_heading = BEHAVIOR)]
491        allow_no_pk: bool,
492
493        /// Ignore column order differences in schema
494        #[arg(long, help_heading = BEHAVIOR)]
495        ignore_order: bool,
496
497        /// Max PK entries per table (limits memory)
498        #[arg(long, default_value = "10000000", help_heading = LIMITS)]
499        max_pk_entries: usize,
500
501        /// Output format: text, json, sql
502        #[arg(short, long, default_value = "text", help_heading = OUTPUT_FORMAT)]
503        format: Option<String>,
504
505        /// Show sample PK values for changes
506        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
507        verbose: bool,
508
509        /// Show progress bar
510        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
511        progress: bool,
512    },
513
514    /// Redact sensitive data (PII) from SQL dumps
515    #[command(visible_alias = "rd")]
516    #[command(after_help = "\x1b[1mExamples:\x1b[0m
517  sql-splitter redact dump.sql -o safe.sql --config redact.yaml
518  sql-splitter redact dump.sql -o safe.sql --null \"*.ssn\" --hash \"*.email\"
519  sql-splitter redact dump.sql --generate-config -o redact.yaml
520  sql-splitter redact dump.sql -o safe.sql --config redact.yaml --seed 42")]
521    Redact {
522        /// Input SQL file (supports .gz, .bz2, .xz, .zst)
523        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
524        file: PathBuf,
525
526        /// Output file (default: stdout)
527        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
528        output: Option<PathBuf>,
529
530        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
531        #[arg(short, long, help_heading = INPUT_OUTPUT)]
532        dialect: Option<String>,
533
534        /// YAML config file for redaction rules
535        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
536        config: Option<PathBuf>,
537
538        /// Generate annotated YAML config by analyzing input file
539        #[arg(long, help_heading = MODE)]
540        generate_config: bool,
541
542        /// Columns to set to NULL (glob patterns, comma-separated)
543        #[arg(long, value_delimiter = ',', help_heading = "Inline Strategies")]
544        null: Vec<String>,
545
546        /// Columns to hash with SHA256 (glob patterns)
547        #[arg(long, value_delimiter = ',', help_heading = "Inline Strategies")]
548        hash: Vec<String>,
549
550        /// Columns to replace with fake data (glob patterns)
551        #[arg(long, value_delimiter = ',', help_heading = "Inline Strategies")]
552        fake: Vec<String>,
553
554        /// Columns to mask (format: pattern=column, e.g., "****-XXXX=*.credit_card")
555        #[arg(long, value_delimiter = ',', help_heading = "Inline Strategies")]
556        mask: Vec<String>,
557
558        /// Column=value pairs for constant replacement
559        #[arg(long, value_delimiter = ',', help_heading = "Inline Strategies")]
560        constant: Vec<String>,
561
562        /// Random seed for reproducible redaction
563        #[arg(long, help_heading = MODE)]
564        seed: Option<u64>,
565
566        /// Locale for fake data generation (default: en)
567        #[arg(long, default_value = "en", help_heading = MODE)]
568        locale: String,
569
570        /// Only redact specific tables (comma-separated)
571        #[arg(short, long, value_delimiter = ',', help_heading = FILTERING)]
572        tables: Vec<String>,
573
574        /// Exclude specific tables (comma-separated)
575        #[arg(short = 'x', long, value_delimiter = ',', help_heading = FILTERING)]
576        exclude: Vec<String>,
577
578        /// Fail on warnings (e.g., unsupported locale)
579        #[arg(long, help_heading = BEHAVIOR)]
580        strict: bool,
581
582        /// Show progress bar
583        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
584        progress: bool,
585
586        /// Preview without writing files
587        #[arg(long, help_heading = BEHAVIOR)]
588        dry_run: bool,
589
590        /// Output results as JSON
591        #[arg(long, help_heading = OUTPUT_FORMAT)]
592        json: bool,
593
594        /// Validate config only, don't process
595        #[arg(long, help_heading = BEHAVIOR)]
596        validate: bool,
597    },
598
599    /// Generate Entity Relationship Diagram (ERD) from SQL dump
600    #[command(visible_alias = "gr")]
601    #[command(after_help = "\x1b[1mExamples:\x1b[0m
602  sql-splitter graph dump.sql -o schema.html
603  sql-splitter graph dump.sql -o schema.mmd --format mermaid
604  sql-splitter graph dump.sql -o schema.png --render
605  sql-splitter graph dump.sql --cycles-only
606  sql-splitter graph dump.sql --table users --transitive")]
607    Graph {
608        /// Input SQL file (supports .gz, .bz2, .xz, .zst)
609        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
610        file: PathBuf,
611
612        /// Output file (.html, .dot, .mmd, .json, .png, .svg)
613        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
614        output: Option<PathBuf>,
615
616        /// Output format: html, dot, mermaid, json
617        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
618        format: Option<String>,
619
620        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
621        #[arg(short, long, help_heading = INPUT_OUTPUT)]
622        dialect: Option<String>,
623
624        /// Layout direction: lr (left-right), tb (top-bottom)
625        #[arg(long, default_value = "lr", help_heading = OUTPUT_FORMAT)]
626        layout: Option<String>,
627
628        /// Show only tables involved in circular dependencies
629        #[arg(long, help_heading = FILTERING)]
630        cycles_only: bool,
631
632        /// Focus on a specific table
633        #[arg(long, help_heading = FILTERING)]
634        table: Option<String>,
635
636        /// Show transitive dependencies (parents of parents)
637        #[arg(long, help_heading = FILTERING)]
638        transitive: bool,
639
640        /// Show reverse dependencies (who references this table)
641        #[arg(long, help_heading = FILTERING)]
642        reverse: bool,
643
644        /// Only include these tables (comma-separated, supports globs)
645        #[arg(short, long, help_heading = FILTERING)]
646        tables: Option<String>,
647
648        /// Exclude these tables (comma-separated, supports globs)
649        #[arg(short, long, help_heading = FILTERING)]
650        exclude: Option<String>,
651
652        /// Maximum depth for transitive dependencies
653        #[arg(long, help_heading = FILTERING)]
654        max_depth: Option<usize>,
655
656        /// Render DOT to PNG/SVG using Graphviz
657        #[arg(long, help_heading = BEHAVIOR)]
658        render: bool,
659
660        /// Show progress bar
661        #[arg(short, long, help_heading = OUTPUT_FORMAT)]
662        progress: bool,
663
664        /// Output results as JSON
665        #[arg(long, help_heading = OUTPUT_FORMAT)]
666        json: bool,
667    },
668
669    /// Output SQL dump with tables in topological FK order
670    #[command(visible_alias = "ord")]
671    #[command(after_help = "\x1b[1mExamples:\x1b[0m
672  sql-splitter order dump.sql -o ordered.sql
673  sql-splitter order dump.sql --check
674  sql-splitter order dump.sql --dry-run
675  sql-splitter order dump.sql --reverse")]
676    Order {
677        /// Input SQL file (supports .gz, .bz2, .xz, .zst)
678        #[arg(value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
679        file: PathBuf,
680
681        /// Output file (default: stdout)
682        #[arg(short, long, value_hint = ValueHint::FilePath, help_heading = INPUT_OUTPUT)]
683        output: Option<PathBuf>,
684
685        /// SQL dialect: mysql, postgres, sqlite, mssql (auto-detected if omitted)
686        #[arg(short, long, help_heading = INPUT_OUTPUT)]
687        dialect: Option<String>,
688
689        /// Verify ordering without writing output
690        #[arg(long, help_heading = BEHAVIOR)]
691        check: bool,
692
693        /// Show order without rewriting the file
694        #[arg(long, help_heading = BEHAVIOR)]
695        dry_run: bool,
696
697        /// Order children before parents (for DROP operations)
698        #[arg(long, help_heading = BEHAVIOR)]
699        reverse: bool,
700    },
701
702    /// Query SQL dumps using DuckDB's analytical engine
703    #[command(visible_alias = "qy")]
704    #[command(after_help = "\x1b[1mExamples:\x1b[0m
705  sql-splitter query dump.sql \"SELECT COUNT(*) FROM users\"
706  sql-splitter query dump.sql \"SELECT * FROM orders WHERE total > 100\" -f json
707  sql-splitter query dump.sql \"SELECT * FROM users LIMIT 10\" -o results.csv -f csv
708  sql-splitter query dump.sql --interactive
709  sql-splitter query huge.sql \"SELECT ...\" --disk
710  sql-splitter query dump.sql \"SELECT ...\" --cache
711  sql-splitter query --list-cache")]
712    Query(query::QueryArgs),
713
714    /// Generate JSON schemas for --json output types (developer tool)
715    #[command(hide = true)]
716    Schema {
717        /// Output directory for schema files
718        #[arg(short, long, default_value = "schemas", value_hint = ValueHint::DirPath)]
719        output: PathBuf,
720
721        /// Generate schema for a specific command only
722        #[arg(short, long)]
723        command: Option<String>,
724
725        /// Print schemas to stdout instead of writing files
726        #[arg(long)]
727        stdout: bool,
728
729        /// List available schema names
730        #[arg(long)]
731        list: bool,
732    },
733
734    /// Generate shell completion scripts
735    #[command(after_help = "\x1b[1mInstallation:\x1b[0m
736  Bash:
737    sql-splitter completions bash > /etc/bash_completion.d/sql-splitter
738    # or: sql-splitter completions bash >> ~/.bashrc
739
740  Zsh:
741    sql-splitter completions zsh > \"${fpath[1]}/_sql-splitter\"
742    # or for oh-my-zsh: sql-splitter completions zsh > ~/.oh-my-zsh/completions/_sql-splitter
743
744  Fish:
745    sql-splitter completions fish > ~/.config/fish/completions/sql-splitter.fish
746
747  PowerShell:
748    sql-splitter completions powershell >> $PROFILE")]
749    Completions {
750        /// Target shell
751        #[arg(value_enum)]
752        shell: Shell,
753    },
754}
755
756pub fn run(cli: Cli) -> anyhow::Result<()> {
757    match cli.command {
758        Commands::Split {
759            file,
760            output,
761            dialect,
762            verbose,
763            dry_run,
764            progress,
765            tables,
766            schema_only,
767            data_only,
768            fail_fast,
769            json,
770        } => split::run(
771            file,
772            output,
773            dialect,
774            verbose,
775            dry_run,
776            progress,
777            tables,
778            schema_only,
779            data_only,
780            fail_fast,
781            json,
782        ),
783        Commands::Analyze {
784            file,
785            dialect,
786            progress,
787            fail_fast,
788            json,
789        } => analyze::run(file, dialect, progress, fail_fast, json),
790        Commands::Merge {
791            input_dir,
792            output,
793            dialect,
794            tables,
795            exclude,
796            transaction,
797            no_header,
798            progress,
799            dry_run,
800            json,
801        } => merge::run(
802            input_dir,
803            output,
804            dialect,
805            tables,
806            exclude,
807            transaction,
808            no_header,
809            progress,
810            dry_run,
811            json,
812        ),
813        Commands::Sample {
814            file,
815            output,
816            dialect,
817            percent,
818            rows,
819            preserve_relations,
820            tables,
821            exclude,
822            root_tables,
823            include_global,
824            seed,
825            config,
826            max_total_rows,
827            no_limit,
828            strict_fk,
829            no_schema,
830            progress,
831            dry_run,
832            json,
833        } => {
834            let effective_limit = if no_limit || max_total_rows == Some(0) {
835                None
836            } else {
837                max_total_rows
838            };
839            sample::run(
840                file,
841                output,
842                dialect,
843                percent,
844                rows,
845                preserve_relations,
846                tables,
847                exclude,
848                root_tables,
849                include_global,
850                seed,
851                config,
852                effective_limit,
853                strict_fk,
854                no_schema,
855                progress,
856                dry_run,
857                json,
858            )
859        }
860        Commands::Shard {
861            file,
862            output,
863            dialect,
864            tenant_column,
865            tenant_value,
866            tenant_values,
867            root_tables,
868            include_global,
869            config,
870            max_selected_rows,
871            no_limit,
872            strict_fk,
873            no_schema,
874            progress,
875            dry_run,
876            json,
877        } => {
878            let effective_limit = if no_limit || max_selected_rows == Some(0) {
879                None
880            } else {
881                max_selected_rows
882            };
883            shard::run(
884                file,
885                output,
886                dialect,
887                tenant_column,
888                tenant_value,
889                tenant_values,
890                root_tables,
891                include_global,
892                config,
893                effective_limit,
894                strict_fk,
895                no_schema,
896                progress,
897                dry_run,
898                json,
899            )
900        }
901        Commands::Convert {
902            file,
903            output,
904            from,
905            to,
906            strict,
907            progress,
908            dry_run,
909            fail_fast,
910            json,
911        } => convert::run(
912            file, output, from, to, strict, progress, dry_run, fail_fast, json,
913        ),
914        Commands::Validate {
915            file,
916            dialect,
917            progress,
918            strict,
919            json,
920            max_rows_per_table,
921            no_limit,
922            no_fk_checks,
923            fail_fast,
924        } => {
925            let effective_limit = if no_limit || max_rows_per_table == 0 {
926                usize::MAX
927            } else {
928                max_rows_per_table
929            };
930            validate::run(
931                file,
932                dialect,
933                progress,
934                strict,
935                json,
936                effective_limit,
937                no_fk_checks,
938                fail_fast,
939            )
940        }
941        Commands::Diff {
942            old_file,
943            new_file,
944            output,
945            tables,
946            exclude,
947            schema_only,
948            data_only,
949            format,
950            dialect,
951            verbose,
952            progress,
953            max_pk_entries,
954            allow_no_pk,
955            ignore_order,
956            primary_key,
957            ignore_columns,
958        } => diff::run(
959            old_file,
960            new_file,
961            output,
962            tables,
963            exclude,
964            schema_only,
965            data_only,
966            format,
967            dialect,
968            verbose,
969            progress,
970            max_pk_entries,
971            allow_no_pk,
972            ignore_order,
973            primary_key,
974            ignore_columns,
975        ),
976        Commands::Redact {
977            file,
978            output,
979            dialect,
980            config,
981            generate_config,
982            null,
983            hash,
984            fake,
985            mask,
986            constant,
987            seed,
988            locale,
989            tables,
990            exclude,
991            strict,
992            progress,
993            dry_run,
994            json,
995            validate,
996        } => redact::run(
997            file,
998            output,
999            dialect,
1000            config,
1001            generate_config,
1002            null,
1003            hash,
1004            fake,
1005            mask,
1006            constant,
1007            seed,
1008            locale,
1009            tables,
1010            exclude,
1011            strict,
1012            progress,
1013            dry_run,
1014            json,
1015            validate,
1016        ),
1017        Commands::Graph {
1018            file,
1019            output,
1020            format,
1021            dialect,
1022            layout,
1023            cycles_only,
1024            table,
1025            transitive,
1026            reverse,
1027            tables,
1028            exclude,
1029            max_depth,
1030            render,
1031            progress,
1032            json,
1033        } => graph::run(
1034            file,
1035            output,
1036            format,
1037            dialect,
1038            layout,
1039            cycles_only,
1040            table,
1041            transitive,
1042            reverse,
1043            tables,
1044            exclude,
1045            max_depth,
1046            render,
1047            progress,
1048            json,
1049        ),
1050        Commands::Order {
1051            file,
1052            output,
1053            dialect,
1054            check,
1055            dry_run,
1056            reverse,
1057        } => order::run(file, output, dialect, check, dry_run, reverse),
1058        Commands::Query(args) => query::run(args),
1059        Commands::Schema {
1060            output,
1061            command,
1062            stdout,
1063            list,
1064        } => run_schema(output, command, stdout, list),
1065        Commands::Completions { shell } => {
1066            generate(
1067                shell,
1068                &mut Cli::command(),
1069                "sql-splitter",
1070                &mut io::stdout(),
1071            );
1072            Ok(())
1073        }
1074    }
1075}
1076
1077fn run_schema(
1078    output_dir: PathBuf,
1079    command: Option<String>,
1080    to_stdout: bool,
1081    list: bool,
1082) -> anyhow::Result<()> {
1083    use crate::json_schema;
1084    use std::fs;
1085
1086    let schemas = json_schema::all_schemas();
1087
1088    if list {
1089        for name in schemas.keys() {
1090            println!("{}", name);
1091        }
1092        return Ok(());
1093    }
1094
1095    if let Some(cmd) = command {
1096        let schema = schemas.get(cmd.as_str()).ok_or_else(|| {
1097            anyhow::anyhow!(
1098                "Unknown command: {}. Use --list to see available schemas.",
1099                cmd
1100            )
1101        })?;
1102
1103        let json = serde_json::to_string_pretty(schema)?;
1104
1105        if to_stdout {
1106            println!("{}", json);
1107        } else {
1108            fs::create_dir_all(&output_dir)?;
1109            let path = output_dir.join(format!("{}.schema.json", cmd));
1110            fs::write(&path, json)?;
1111            eprintln!("Wrote: {}", path.display());
1112        }
1113    } else if to_stdout {
1114        for (name, schema) in &schemas {
1115            let json = serde_json::to_string_pretty(schema)?;
1116            println!("// {}.schema.json\n{}\n", name, json);
1117        }
1118    } else {
1119        fs::create_dir_all(&output_dir)?;
1120        for (name, schema) in &schemas {
1121            let json = serde_json::to_string_pretty(schema)?;
1122            let path = output_dir.join(format!("{}.schema.json", name));
1123            fs::write(&path, json)?;
1124            eprintln!("Wrote: {}", path.display());
1125        }
1126    }
1127
1128    Ok(())
1129}